Batch Summary

Expand Code

# Load Libraries
start_time_all <- Sys.time()

options(warn = 1)  # output warnings as they appear for traceability in stdout/stderr record

knitr::opts_chunk$set(echo = TRUE, warning = FALSE) # warnings will go to console

quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}

quiet_library(batchreporter)
quiet_library(Matrix)        # dependency of H5weaver
quiet_library(rhdf5)         # dependency of H5weaver
quiet_library(H5weaver)      # aifi package
quiet_library(HTOparser)     # aifi package
quiet_library(ggplot2)       
quiet_library(dplyr)         # data wrangling
quiet_library(cowplot)       # arranging multiple plots
quiet_library(gt)            # formatted table output
quiet_library(plotly)        # interactive plots
quiet_library(tidyr)         # data wrangling  
quiet_library(jsonlite)      # reading and writing json metadata files
quiet_library(purrr)         
quiet_library(future)        # multi-threading for batch umap creation
quiet_library(future.apply)  # multi-threading for batch umap creation
quiet_library(Seurat)

stm("Starting NGS Batch Report")

stm(paste(c("\t",paste(names(Sys.info()),Sys.info(),sep = ": ")), collapse = "\n\t"))  

Argument Parsing

# give input directory rna-specific name 
if(is.null(params$in_dir)) {
  batch_id <- "X070"
  in_dir <- system.file("extdata/X070", package = "batchreporter")
  in_key <- system.file("extdata/example_sample_key_X070.csv", package = "batchreporter")
  in_config <- system.file("extdata/default_rna_config_v1.csv", package = "batchreporter")
  in_batch_meta <- system.file("extdata/batch-metadata.json", package = "batchreporter")
  in_method_string <- "scrna;scatac;adt;hto"
  out_dir <- tempdir()
} else {
  batch_id <-  params$batch_id  
  in_method_string <- params$in_method
  in_dir <- params$in_dir  
  in_key <- params$in_key  
  in_config <- params$in_config  
  if(is.null(in_config)){
      in_config <- system.file("extdata/default_rna_config_v1.csv", package = "batchreporter")
      stm("No config file provided. Using default_rna_config_v1.csv for pbmc data")
  }
  out_dir <- params$out_dir
  in_batch_meta <- params$in_batch_meta
  if(is.null(in_batch_meta)){in_batch_meta <- NA}
}

stm(paste0("IN Batch        : ", batch_id))
stm(paste0("IN Method       : ", in_method_string))
stm(paste0("IN Directory    : ", in_dir))
stm(paste0("IN Sample Key   : ", in_key))
stm(paste0("IN Config       : ", in_config))
stm(paste0("IN Batch Processing Info : ", in_batch_meta))
stm(paste0("OUT Dir         : ", out_dir))

print(paste0("IN Batch        : ", batch_id))
## [1] "IN Batch        : X070"
print(paste0("IN Method       : ", in_method_string))
## [1] "IN Method       : scrna;scatac;adt;hto"
print(paste0("IN Directory    : ", in_dir))
## [1] "IN Directory    : /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070"
print(paste0("IN Sample Key   : ", in_key))
## [1] "IN Sample Key   : /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/example_sample_key_X070.csv"
print(paste0("IN Config       : ", in_config))
## [1] "IN Config       : /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/default_rna_config_v1.csv"
print(paste0("IN Batch Processing Info : ", in_batch_meta))
## [1] "IN Batch Processing Info : /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/batch-metadata.json"
print(paste0("OUT Dir         : ", out_dir))
## [1] "OUT Dir         : /var/folders/zc/cdsbwh5d60d_2h9c9brv90s80000gp/T//Rtmpo06rqu"

Check input files

if(!dir.exists(in_dir)) {
  stm(paste("ERROR: Cannot find IN results dir:", in_dir))
  stop()
}
if(!file.exists(in_key)) {
  stm(paste("ERROR: Cannot find IN sample key:", in_key))
  stop()
}
if(!file.exists(in_config)) {
  stm(paste("ERROR: Cannot find IN config file:", in_config))
  stop()
}
if(!is.na(in_batch_meta) && !file.exists(in_batch_meta)) {
  stm(paste("ERROR: Cannot find IN batch meta:", in_batch_meta))
  stop()
}
if(!dir.exists(out_dir)) {
  stm(paste("Creating output directory:", out_dir))
  dir.create(out_dir)
}

out_prefix <- file.path(out_dir, paste0(batch_id, "_"))

Read in the sample key

stm("Reading in sample key")
df_key <- data.table::fread(in_key)
has_controls <- any(grepl("Control", df_key$Type))  # used to control evaluation of batch control-related code chunks

assertthat::assert_that(length(unique(df_key$BatchID)) == 1, msg = "More than 1 batch in input sample key file")
## [1] TRUE
assertthat::assert_that(batch_id == unique(df_key$BatchID), msg = "Batch in input key file does not match input batch value")
## [1] TRUE

Determine which modalities streams were run

defined_modalities <- c("scrna", "scatac", "adt", "hto")

# convert method string to vector
in_method <- strsplit(in_method_string, split = ";")[[1]]
in_method <- tolower(in_method)

# Logic check input methods
if(!all(in_method %in% defined_modalities)){
    unknowns <- setdiff(in_method, defined_modalities)
    stop(sprintf("One or more input methods are not in defined modalities: '%s'. Defined modalities are: [%s]. Input methods should be passed as a ';'-delimited string, ie 'scrna;scatac;hto'.",
                    paste(unknowns, collapse = "', '"),
                    paste(defined_modalities, collapse = ', ')))
} 

has_rna <- "scrna" %in% in_method
has_atac <- "scatac" %in% in_method
has_adt <- "adt" %in% in_method
has_hto <- "hto" %in% in_method

Define and check input folder expectations

if(has_rna){
  in_rna <- file.path(in_dir, "scrna")
  if(!dir.exists(in_rna)){
    stop(sprintf("Expected RNA input directory [%s] does not exist.", in_rna))
  }
}

if(has_atac){
  in_atac <- file.path(in_dir, "scatac")
  if(!dir.exists(in_atac)){
    stop(sprintf("Expected ATAC input directory [%s] does not exist.", in_atac))
  }
}

if(has_adt){
  in_adt <- file.path(in_dir, "adt")
  if(!dir.exists(in_adt)){
    stop(sprintf("Expected ADT input directory [%s] does not exist.", in_adt))
  }
}

if(has_hto){
  in_hto <- file.path(in_dir, "hto")
  if(!dir.exists(in_hto)){
    stop(sprintf("Expected HTO input directory [%s] does not exist.", in_hto))
  }
}

Batch Information

stm("Constructing Batch Information table")

# Summarize batch information, also declare some global batch variables that are used throughout the report
pools <- unique(df_key$PoolID)
n_pools <- length(pools)
if(n_pools==1 & grepl("P0$", pools)){
  n_pools_label <- 0
} else {
  n_pools_label <- n_pools
}

wells <- unlist(strsplit(unique(df_key$WellID), split = ";"))
n_wells <- length(wells)

samples <- unique(df_key$SampleID)
n_samples <- length(samples)

controls <- unique(df_key$SampleID[df_key$Type == "Control"])
control_string <- ifelse(has_controls, paste(controls, collapse = ", "), "None")

if (has_controls){
  n_study_samples <- setdiff(samples, controls)
} else {
  n_study_samples <- samples
}

n_study_samples <- length(n_study_samples)

samples_pool <- tapply(df_key$SampleID, df_key$PoolID, unique)
samples_pool_string <- sapply(samples_pool, function(x){paste(x, collapse = ", ")})

labels <- c("Batch","Libraries","N Samples", "N Pools","N Wells","Batch Control", paste0(pools, " Samples"))
values <-  c(batch_id, in_method_string, n_study_samples, n_pools_label, n_wells, control_string, samples_pool_string)

simple_html_table(labels, values, fontsize = 3, col_widths_px = c(175, 850))
Batch X070
Libraries scrna;scatac;adt;hto
N Samples 6
N Pools 1
N Wells 3
Batch Control None
X070-P1 Samples PB1051W10, PB1178W10, PB1194W10, PB2216W10, PB5206W10, PB7626W10

Batch Processing

if(!is.na(in_batch_meta)){
  if(!file.exists(in_batch_meta)) {stop(sprintf("Supplied in_batch_meta file '%s' does not exist. ",in_batch_meta))}
  meta_table <- jsonlite::read_json(in_batch_meta)
  meta_table <- as.data.frame(meta_table)  # collapses any nested names
  
  if(nrow(meta_table) == 1){
    simple_html_table(colnames(meta_table), unlist(meta_table[1,]), fontsize = 3, col_widths_px = c(200,250))
  } else{
    stop(sprintf("in_batch_meta file should have 1:1 key value pairs. Check supplied file '%s' for proper formatting. ",in_batch_meta))
  }
} else {
  cat("No processing details supplied")
}  
cellranger_version CellRanger 4.0
mapping_reference hg38
labeling_method Seurat v.3.0

Cell Hashing

The following data summarizes parsing of hash tag oligos (HTO’s) per well and filtering for singlet cells based on cross-hash doublet/multiplet identification.

Contents

Singlet Summaries (hto-based)

# output warnings
if(length(hto_warning_list) == 0){
    hto_warning_list <- "None"
}

simple_html_table(labels = "File Warnings", values = paste(hto_warning_list, collapse = "<br>"), col_widths_px = c(120, 600))
File Warnings None

Expand Code

# Config
config_list<- batchreporter::load_config(in_config)

hash_key <- config_list$hash_key
sample_column_name <- config_list$sample_column_name

# Sample Key
df_key <- data.table::fread(in_key)

pools <- unique(df_key$PoolID)
n_pools <- length(pools)

wells <- unlist(strsplit(unique(df_key$WellID), split = ";"))
n_wells <- length(wells)

samples <- unique(df_key$SampleID)
n_samples <- length(samples)
# Read in all hto key to match hto names to barcodes
hto_key <- system.file(file.path("reference",hash_key), package = "HTOparser")  # Parameterize this value
in_hto_key <- fread(hto_key, header = FALSE, col.names = c("hto_barcode","hto_name")) %>% 
  mutate(hto_order = as.numeric(gsub("HT","", hto_name))) %>% 
  mutate(hto_name = factor(hto_name, levels = hto_name[order(hto_order)])) %>% # use HT number value to reorder the HT levels
  select(-hto_order)

# Read in all hto json, check expected numbers vs input well info
all_hto_json <- list.files(path = in_hto, 
                           pattern = "hto_processing_metrics.json$", 
                           full.names = TRUE, recursive = TRUE)
n_json <- length(all_hto_json)
if(n_json == 0){
  stop(sprintf("No 'hto_processing_metrics.json' files found in %s", in_hto))
} else if (n_json < n_wells){
  hto_json_warn <- sprintf("Input number of 'hto_processing_metrics.json' files (%s) is fewer than number of wells (%s) in sample key",
                           n_json, n_wells)
  warning(hto_json_warn)
  hto_warning_list <- c(hto_warning_list, hto_json_warn)
}
stm(paste0("IN HTO Processing JSON Files        :\n\t", paste(all_hto_json, collapse = "\n\t")))
cat(paste0("IN HTO Processing JSON Files        :\n\t", paste(all_hto_json, collapse = "\n\t")))
## IN HTO Processing JSON Files        :
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W1_hto_processing_metrics.json
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W2_hto_processing_metrics.json
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W3_hto_processing_metrics.json
# Read in all hto matrix files, check expected numbers vs input well info
all_hto_mat <- list.files(path = in_hto, 
                           pattern = "hto_count_matrix.csv.gz$", 
                           full.names = TRUE, recursive = TRUE)
n_hto_mat <- length(all_hto_mat)
if(n_hto_mat == 0){
  stop(sprintf("No 'hto_count_matrix.csv.gz' files found in %s", in_hto))
} else if (n_hto_mat < n_wells){
  hto_mat_warn <- sprintf("Input number of 'hto_count_matrix.csv.gz' files (%s) is fewer than number of wells (%s) in sample key",
                           n_json, n_wells)
  warning(hto_mat_warn)
  hto_warning_list <- c(hto_warning_list, hto_mat_warn)
}
stm(paste0("IN HTO Matrix Files        :\n\t", paste(all_hto_mat, collapse = "\n\t")))  
cat(paste0("IN HTO Matrix Files        :\n\t", paste(all_hto_mat, collapse = "\n\t")))  
## IN HTO Matrix Files        :
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W1_hto_count_matrix.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W2_hto_count_matrix.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W3_hto_count_matrix.csv.gz
# Read in all hto metadata files, check expected numbers vs input well info
all_hto_meta <- list.files(path = in_hto, 
                           pattern = "hto_category_table.csv.gz$", 
                           full.names = TRUE, recursive = TRUE)
n_hto_meta <- length(all_hto_meta)
if(n_hto_meta == 0){
  stop(sprintf("No 'hto_category_table.csv.gz' files found in %s", in_hto))
} else if (n_hto_meta < n_wells){
  hto_meta_warn <- sprintf("Input number of 'hto_category_table.csv.gz' files (%s) is fewer than number of wells (%s) in sample key",
                           n_json, n_wells)
  warning(hto_meta_warn)
  hto_warning_list <- c(hto_warning_list, hto_meta_warn)
}
stm(paste0("IN HTO Metadata Files        :\n\t", paste(all_hto_meta, collapse = "\n\t")))
cat(paste0("IN HTO Metadata Files        :\n\t", paste(all_hto_meta, collapse = "\n\t")))
## IN HTO Metadata Files        :
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W1_hto_category_table.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W2_hto_category_table.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W3_hto_category_table.csv.gz
# output warnings
if(length(hto_warning_list) == 0){
    hto_warning_list <- "None"
}

simple_html_table(labels = "File Warnings", values = paste(hto_warning_list, collapse = "<br>"), col_widths_px = c(120, 600))
stm("Reading in hto metadata .csv files")
hto_meta_list <- lapply(all_hto_meta, fread)
hto_meta_wells <- gsub("_.*","",basename(all_hto_meta))
hto_meta_list <- mapply(function(x,y){x$well_id <- y; x}, hto_meta_list, hto_meta_wells, SIMPLIFY = F)
hto_meta <- do.call(rbind, hto_meta_list)
rm("hto_meta_list")

hto_meta[in_hto_key, on = 'hto_barcode', hto_name := i.hto_name]  # merge in the hto names
hto_meta[ , pool_id := gsub("C\\dW\\d","", well_id)]
hto_meta[ , sample_hto:= sprintf("%s\n%s", hto_name, get(sample_column_name))]
hto_meta[ , sample_hto_pool:= sprintf("%s\n%s%s", hto_name, get(sample_column_name), pool_id)]
hto_meta[ , hto_order:=  as.numeric(gsub("HT","", hto_name))]
hto_meta[ , sample_hto_pool:=  factor(sample_hto_pool, levels = unique(sample_hto_pool[order(pool_id, hto_order)]))]
hto_meta[ , hto_order:= NULL]
hto_meta[ , hto_category:= factor(hto_category, levels = c("no_hash", "singlet", "doublet", "multiplet"))]
# hto_meta_list <- split(hto_meta, hto_meta$well_id)

hto_meta_singlet <- subset(hto_meta, hto_category=="singlet")
# Read in json files
stm("Reading in hto processing metrics json files")
well_hto_json_list <- lapply(all_hto_json, read_hto_well_json)
well_hto_json_df <- do.call(rbind, well_hto_json_list)
well_hto_json_df <- well_hto_json_df %>% 
  left_join(in_hto_key, by = "hto_barcode") %>% 
  dplyr::mutate(pool_id_short = gsub(".*-", "", gsub("C.*","", well_id))) %>% 
  dplyr::mutate(pool_id = gsub("C.*","", well_id)) %>% 
  dplyr::mutate(sample_hto = paste(hto_name, get(sample_column_name), sep = "\n")) %>% 
  dplyr::mutate(sample_hto = factor(sample_hto, levels = unique(sample_hto[order(hto_name, pool_id)]))) %>% 
  dplyr::mutate(hto_name_barcode = paste(hto_name, hto_barcode, sep = "\n")) %>%  
  dplyr::mutate(hto_name_barcode = factor(hto_name_barcode, levels = unique(hto_name_barcode[order(hto_name)]))) %>%
  dplyr::mutate(sample_pool = paste(get(sample_column_name), pool_id_short, sep = "_")) %>% 
  dplyr::mutate(sample_pool_hto = paste(sample_pool, hto_barcode, sep = "\n")) 

remove("well_hto_json_list")
stm("Reading in hash matrixes from labeled h5 and multiplet h5 files")
# read in as data.table, convert to data frame with hto barcodes as rownames
hto_mat_list <- lapply(all_hto_mat, fread)

stm("Formatting hto cutoff data")
hto_mat_list <- lapply(hto_mat_list, data.frame, row.names=1)
# name the list items by well
hto_mat_wells <- gsub("_.*", "", basename(all_hto_mat))
names(hto_mat_list) <-  hto_mat_wells
# Join with metadata before merging well in case some cell barcodes are not unique
hto_df_list <- lapply(hto_mat_wells, function(x){
  mat <- data.frame(t(hto_mat_list[[x]]))
  setDT(mat, keep.rownames = "barcodes")
  to_melt <- setdiff(colnames(mat), "barcodes")
  mat <- melt(mat, id.vars ="barcodes", variable.name ="hto", 
                     value.name= "count",measure.vars = to_melt)
  mat <- mat[hto_meta, on = c("barcodes"= "cell_barcode"), `:=`(pool_id= i.pool_id, well_id=i.well_id)]
})
all_hash_fmt <- do.call(rbind, hto_df_list)
# Merge in metadata
all_hash_fmt <- all_hash_fmt[hto_meta[,.(cell_barcode, well_id, pool_id, hto_barcode, hto_category)], 
                             on =c("barcodes"="cell_barcode","well_id"="well_id","pool_id"="pool_id")]
# Merge in well/hto level hashing info
all_hash_fmt <- all_hash_fmt[well_hto_json_df, on = c(hto="hto_barcode", well_id="well_id", pool_id = "pool_id")]

# Add variables for plotting hto distributions
all_hash_fmt[, log_count := ifelse(count>0, log10(count), NA)] 
all_hash_fmt[, log_cutoff := ifelse(is.na(log_count), NA, log10(cutoff))] 
all_hash_fmt[, log_count_bin := cut(log_count, 
                                      breaks = seq(min(log_count,na.rm=T), 
                                                   max(log_count,na.rm=T), 
                                                   length.out = 30))] 
# Format labels as "hash not detected" for any well/hash that has no cells with hash count >5
low_hto_cutoff <- 5
all_hash_fmt[, n_count := sum(count > low_hto_cutoff), by = .(well_id, hto)]
all_hash_fmt[, detected := n_count > 0] 
all_hash_fmt[, plot_label := ifelse(detected, "", "Hash not detected")] 

all_hash_fmt[, log_count := ifelse(detected, log_count, NA)]   # censor counts for plotting if overall hash not detected
all_hash_fmt[, log_cutoff := ifelse(detected, log_cutoff, NA)]  # censor cutoffs for plotting if overall hash not detected 

pools <- unique(all_hash_fmt$pool_id)
n_pools <- length(pools)

Hash Tag UMI

HTO UMI counts have been pre-filtered based on cell barcode. Only barcodes associated with cells called by CellRanger are included.

Pool HTO UMI Counts

stm("Output total hto umi per pool")
pool_info <- all_hash_fmt %>%
  dplyr::group_by(pool_id) %>%
  dplyr::summarize(total_hto_umi = formatC(sum(count), big.mark = ",", digits = 0, format = "f"), .groups = "drop")
  
pool_info %>%
  gt::gt() %>%
  gt::cols_align(align = "right", columns = 2)
pool_id total_hto_umi
X070-P1 47,366,047

Well HTO UMI Counts

stm("Output barplot total hto umi per pool")
hash_by_well <- all_hash_fmt[,.(hto_count=sum(count)), 
                             by = .(pool_id, well_id, hto, sample_pool_hto, hto_name_barcode)]

g <- ggplot(hash_by_well, aes(well_id, hto_count)) +
  geom_bar(stat="identity", fill = "blue") +
  theme_bw() +
  scale_y_continuous(labels = scales::comma) +
  guides(fill=guide_legend(title="HTO")) +
  theme(axis.text.x = element_text(angle = 90, vjust=0.5)) +
  xlab("Wells") +
  ylab("Total HTO UMI") +
  facet_wrap(~pool_id, ncol = n_pools, scales = "free_x", drop = TRUE)

tempwidth <- max(n_samples*0.5 + n_pools*0.1 + 0.2, 4)


make_subchunk(g, subchunk_name = "hto_totalumi_subchunk",quiet_knit = T,
              chunk_opt_list = list(fig.width = tempwidth, fig.height = 3))
(function () {    g})()

Return to Contents

Well HTO UMI Counts by HTO

stm("Output barplot total hto umi per pool per hto")
g2 <- qc_aligned_barplot_facet(hash_by_well, 
                               category_x = "well_id", 
                               name_x = "Wells", 
                               category_y = "hto_name_barcode", 
                               category_name = "hto", 
                               stat = "identity", 
                               variable_y_identity = "hto_count", 
                               name_y = "HTO UMI Counts", 
                               facet_formula = formula("~pool_id"), 
                               scales = "free_x", 
                               drop = TRUE)

tempwidth2 <- n_samples*0.5 + n_pools*0.1 + 2.2

make_subchunk(g2, subchunk_name = "hto_umibyhto_subchunk",quiet_knit = T,
              chunk_opt_list = list(fig.width = tempwidth, fig.height = 6))
(function () {    g})()

Return to Contents

HTO Parsing

Showing number of cells positively associated with each hash tag for each well. Includes singlets and multiplets.

HTO Positive Cells per Well

stm("Calculations for heatmap positive hto per well")
# Expected HTO per well based on input sample sheet
all_wells <- trimws(unique(unlist(strsplit(df_key$WellID, split = ";"))))
all_hto <- in_hto_key$hto_name
df_expected_hto <- expand.grid(well_id = all_wells, hto_name = all_hto) %>% 
  left_join(in_hto_key, by = "hto_name")
df_expected_hto$hto_expected <- mapply(function(x, y){any(grepl(x, df_key$WellID[df_key$HashTag == y]))},
                                   df_expected_hto$well_id, 
                                   df_expected_hto$hto_name)  


# N Cells Pos HTO per well, defined as cells above cutoff
df_expected_hto$n_positive_cells <- mapply(function(x, y){
      well_hto_json_df$n_pos[which(well_hto_json_df$well_id == x & well_hto_json_df$hto_name == y)]
    },
    df_expected_hto$well_id, 
    df_expected_hto$hto_name)
df_expected_hto$n_positive_cells[df_expected_hto$n_positive_cells <= 5] <- NA  # change to NA to color undetected tiles uniquely
if(is.list(df_expected_hto$n_positive_cells)){
  b_missing <- sapply(df_expected_hto$n_positive_cells, function(x){length(x)==0})
  b_not_missing <- !b_missing
  v_n_pos <- numeric(nrow(df_expected_hto))
  v_n_pos[b_not_missing] <- unlist(df_expected_hto$n_positive_cells[b_not_missing])
  v_n_pos[b_missing] <- NA
  df_expected_hto$n_positive_cells <- v_n_pos
}

# Flag for cells not detected
df_expected_hto$hto_detection_status <-  mapply(function(x, y){
      ifelse(x == TRUE & is.na(y),"Missing",
             ifelse(x == TRUE & !is.na(y), "Detected",
             ifelse(x == FALSE & is.na(y), "Not Used", NA)))
    },
    df_expected_hto$hto_expected, 
    df_expected_hto$n_positive_cells)

# Heat Map 
# temp_scale <- scales::rescale(c(0, 5, 1, max(df_expected_hto$n_positive_cells)))
stm("Output heatmap positive hto per well")
hm_pos <- ggplot(df_expected_hto, aes(hto_name, y=factor(well_id, levels = sort(unique(well_id), decreasing = TRUE)))) +
  geom_tile(aes(fill = n_positive_cells, color = factor(hto_detection_status, levels = c("Not Used", "Detected","Missing"))), 
            width = 0.7, height = 0.7, size = 1) +
  scale_fill_viridis_c(option = "D", na.value = "grey") +
  scale_color_manual(name = "hto_detection_status", breaks = c("Not Used", "Detected","Missing"), values = c("grey","black","red"), drop = FALSE) +
  scale_x_discrete(position = "top") +
  xlab("HTO") +
  ylab("Well") +
  facet_grid(substr(well_id,1,7) ~., drop = T, scales = "free_y")

if(!exists("n_wells")){
  wells <- unlist(strsplit(unique(df_key$WellID), split = ";"))
  n_wells <- length(wells)
}

temp_figheight <- n_wells*0.25 + 1
temp_figwidth <- length(unique(df_expected_hto$hto_name))*0.4 + 2

make_subchunk(hm_pos, subchunk_name="hto_positive_matrix_subchunk", quiet_knit = T, 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth))
(function () {    g})()

Return to Contents

HTO Distributions

A specific hashtag oligo (HTO) is detected in a well when >5 cells have any raw counts. Calculated cutoffs separate cells with background HTO count levels from cells considered positive for the HTO.

stm("Plotting hto count ridgeplots")

get_plt_ht_temp <- function(n_well_pool, n_hto, title_label_height = 0.5, row_height_constant = 3){
    plot_height <- (n_well_pool*0.4 + row_height_constant)*ceiling(n_hto/7) + title_label_height
    return(plot_height)
}

# Censor hto if detected in <= 5 cells in the well
for (i in seq_along(pools)){
  # pool data
  df <- all_hash_fmt %>% 
    dplyr::filter(pool_id == pools[i]) %>% 
    dplyr::mutate(well_id = factor(well_id, levels = sort(unique(well_id), decreasing=T))) 
  
  # cutoff line segment
  df_lines <- df %>%
    dplyr::select(well_id, sample_hto, hto, cutoff, log_cutoff)  %>% 
    dplyr::filter(!is.na(log_cutoff)) %>% 
    dplyr::distinct() 
  
  # labels for hash not detected
  detected_labels <- df %>% 
    dplyr::select(well_id, hto, sample_hto, plot_label) %>% 
  dplyr::distinct()
  
  x_max <- max(all_hash_fmt$log_count, na.rm = T)
  x_min <- -0.1
  x_label <- (x_max - x_min)/2 + x_min

  # Plot
  g_ridge <- ggplot(df, aes(x = log_count, y = well_id)) +
    ggridges::geom_density_ridges(
                                  scale = 7, 
                                  stat = "binline",
                                  binwidth = 0.1,
                                  size = 0.5, # line width
                                  aes(color = well_id), alpha = 0,
                                  na.rm = TRUE) +
    geom_segment(data = df_lines, aes(x = log_cutoff, xend = log_cutoff,
                                      y = as.numeric(well_id), yend = as.numeric(well_id) + 0.9,
                                      linetype = "well cutoff"), 
                 color = "black", na.rm = TRUE) +
    scale_x_continuous(limits = c(x_min, x_max)) +
    scale_y_discrete(expand = c(0.01, 0)) +
    scale_linetype_manual("cutoff",values=c("well cutoff"=1)) +
    geom_text(data = detected_labels, x= x_label, 
              aes(y= as.numeric(well_id),label = plot_label), 
              size = 4, vjust = 0) +
    facet_wrap(~sample_hto, ncol = 7) +
    ggtitle(pools[i]) +
    xlab("log10(Count)") +
    ylab("Well ID") +
    theme_bw() +
    theme(axis.text.y = element_text(size = 12),
          axis.title = element_text(size = 18),
          plot.title = element_text(size = 24),
          strip.text = element_text(size = 12),
          legend.text = element_text(size = 12))
  
  # Set Height
  temp_height <- get_plt_ht_temp(n_well_pool = length(unique(df$well_id)), n_hto = length(unique(df$hto)))
  
  # Output plot with custom chunk format
  batchreporter::make_subchunk(g_ridge, subchunk_name = paste0("ridge_", i), 
                chunk_opt_list = list(fig.height = temp_height, fig.width = 18, warning = FALSE), 
                quiet_knit = TRUE)
}
(function () {    g})()

Return to Contents

HTO Cutoffs

stm("Plotting hto cutoff boxplots")
g_cutoff_box <- ggplot(well_hto_json_df, aes(sample_hto, cutoff, color = hto_barcode)) +
  suppressWarnings(geom_point(alpha = 0.4, position = position_jitter(height = 0, width = 0.3, seed = 20201112),
             aes(text = sprintf("Well ID: %s",  well_id)))) +
  geom_boxplot(alpha = 0, outlier.alpha = 1, color = "black") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90)) +
  facet_wrap(~pool_id, drop = TRUE, scales = "free_x", ncol = 2)+
  ggtitle("Well-Specific HTO Cutoffs") 

temp_figwidth <- (4*((n_pools>1)+1) + 3)
temp_figheight <- ceiling(n_pools/2)*5 + 0.4

ply_cutoff_box <- plotly::ggplotly(g_cutoff_box) %>% 
  adjust_axis_title_spacing_plotly("x", adjustment = -1.3/temp_figheight) %>%
  adjust_axis_title_spacing_plotly("y", adjustment = -0.5/temp_figwidth) %>% 
  plotly::layout(xaxis = list(title = list(text = "", standoff = 30L)),
                 yaxis = list(title = list(text = "", standoff = 20L))) 

make_subchunk(ply_cutoff_box, subchunk_name = "hto_assignment_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Top

HTO Positivity Rate

stm("Plotting hto positive call box plots")
g_cutoff_pos <- ggplot(well_hto_json_df, aes(sample_hto, frac_pos, color = hto_barcode)) +
  suppressWarnings(geom_point(alpha = 0.4, position = position_jitter(height = 0, seed = 20201112),
             aes(text = sprintf("Well ID: %s",  well_id)))) +
  geom_boxplot(alpha = 0, outlier.alpha = 1, color = "black") +
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90),
                axis.title.x = element_text(margin = margin(c(15,0,1,0)))) +
  facet_wrap(~pool_id, drop = TRUE, scales = "free_x", ncol = 2)+
  ggtitle("Fraction Positive HTO Calls per Well")

temp_figwidth <- (4*((n_pools>1)+1) + 3)
temp_figheight <- ceiling(n_pools/2)*5 + 0.4

ply_cutoff_pos <- plotly::ggplotly(g_cutoff_pos) %>% 
  adjust_axis_title_spacing_plotly("x", adjustment = -1.3/temp_figheight) %>%
  adjust_axis_title_spacing_plotly("y", adjustment = -0.5/temp_figwidth) %>% 
  plotly::layout(xaxis = list(title = list(text = "", standoff = 30L)),
                 yaxis = list(title = list(text = "", standoff = 20L))) 

make_subchunk(ply_cutoff_pos, subchunk_name = "hto_positive_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

HTO Category Summary

Pool Hashing Summary

stm("Output pool based summary table")
pool_summary <- hto_meta[ ,.(n_cells = .N),
                          by = .(pool_id, hto_category)]
setorder(pool_summary, pool_id, hto_category)
pool_summary[, pct_cells := round(n_cells/sum(n_cells)*100,1), by = pool_id]
setcolorder(pool_summary, c("pool_id", "hto_category", "n_cells", "pct_cells"))

qc_table(pool_summary)

Return to Contents

HTO Category Counts by Well

stm("Plotting hto category counts per well")

g <- qc_aligned_barplot_facet(hto_meta,
                   category_x = "well_id",
                   name_x = "Well ID",
                   category_y = "hto_category",
                   category_name = "HTO Category",
                   colorset_y = "varibow",
                   name_y = "N Cells",
                   padding = 0.2, 
                   facet_formula = formula("~pool_id"), nrow = 1, scales = "free_x")

temp_figwidth <-  max(0.3*n_wells + 0.5*n_pools + 1.5, 4)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
make_subchunk(g, subchunk_name = "hto_cat_counts_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

HTO Category Fraction by Well

stm("Plotting hto category fraction per well")

g <- qc_stacked_barplot_facet(hto_meta,
                       category_x = "well_id",
                       name_x = "Well ID",
                       category_y = "hto_category",
                       category_name = "HTO Category",
                       colorset_y = "varibow",
                       name_y = "Fraction of Cells",
                       as_fraction = TRUE, facet_formula = formula("~pool_id"), ncol=2, scales = "free_x") 

temp_figwidth <- max(0.3*n_wells + 0.5*n_pools + 1.5, 4)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
make_subchunk(g, subchunk_name = "hto_cat_fraction_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Singlet Summaries (well-based)

Well-Based Summary

Expand data table of summary stats by well

well_summary <- hto_meta[ ,.(n_cells = .N),
                          by = .(pool_id, well_id, hto_category)]
well_summary[, pct_cells := round(n_cells/sum(n_cells)*100,1), by = c("pool_id", "well_id")]
setorder(well_summary, pool_id, well_id, hto_category)

qc_table(well_summary)

Return to Contents

HTO Barcode Count per Well Plot

stm("Plotting barcode count per well and hto")

g <- qc_aligned_barplot_facet(meta = hto_meta_singlet,
                   category_x = "well_id",
                   name_x = "Well ID",
                   category_y = "sample_hto_pool",
                   category_name = "HTO/Sample",
                   colorset_y = "varibow",
                   name_y = "Number of Cells",
                   padding = 0.2,
                   facet_formula = formula("~pool_id"), nrow = 1, scales = "free", drop = TRUE) 

# g <- cowplot::plot_grid(plotlist = plot_list, ncol = 2)

temp_figwidth <- max(0.3*n_wells + 0.5*n_pools + 2, 4)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
batchreporter::make_subchunk(g, subchunk_name = "hto_cat_count_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

HTO Barcode Fraction per Well Plot

stm("Plotting barcode fraction per well and hto")

g <- qc_stacked_barplot_facet(meta = hto_meta_singlet,
                   category_x = "well_id",
                   name_x = "Well ID",
                   category_y = "sample_hto_pool",
                   category_name = "HTO/Sample",
                   colorset_y = "varibow",
                   name_y = "Fraction of Cells",
                   as_fraction = TRUE, 
                   facet_formula = formula("~pool_id"), nrow = 1, scales = "free_x") +
    theme(text = element_text(size = 12))

temp_figwidth <- max(0.3*n_wells + 0.5*n_pools + 1.5, 4)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
batchreporter::make_subchunk(g, subchunk_name = "hto_fraction_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Singlet Summaries (hash-based)

Hash-Based Summary

Expand data table of summary stats by hash tag

stm("Output pool based summary table")
hto_singlet_summary <- hto_meta_singlet[ ,.(n_singlet_cells = .N),
                             by = setNames(list(pool_id, hto_name, hto_barcode, get(sample_column_name)), c('pool_id', 'hto_name','hto_barcode', sample_column_name))]
setorder(hto_singlet_summary, pool_id, hto_name)
setcolorder(hto_singlet_summary, c("pool_id",sample_column_name, "hto_name", "hto_barcode", "n_singlet_cells"))

qc_table(hto_singlet_summary)

Return to Contents

Well Count per HTO Barcode Plot

stm("Plotting well count per hto")

plot_list <- list()
for (i in seq_along(pools)){
  plot_list[[i]] <- qc_aligned_barplot_facet(meta = hto_meta_singlet[pool_id == pools[i],],
                   category_x = "sample_hto_pool",
                   category_y = "well_id",
                   category_name = "Well ID",
                   name_x = "HTO/Sample",
                   colorset_y = "varibow",
                   name_y = "Number of Cells",
                   facet_formula = formula("~pool_id"), nrow = 1, scales ="free_x", drop = TRUE) 
}

g <- cowplot::plot_grid(plotlist = plot_list, ncol = 2)
temp_figwidth <- max(0.6*n_wells + 1*n_pools + 3, 8)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
batchreporter::make_subchunk(g, subchunk_name = "well_count_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE, class.output = "superbigimage"), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Well Fraction per HTO Barcode Plot

stm("Plotting well fraction per hto")

g <- qc_stacked_barplot_facet(meta = hto_meta_singlet,
                   category_x = "sample_hto_pool",
                   category_y = "well_id",
                   category_name = "Well ID",
                   name_x = "HTO/Sample",
                   colorset_y = "varibow",
                   name_y = "Fraction of Cells",
                   as_fraction = TRUE , 
                   facet_formula = formula("~pool_id"), nrow = 1, scales ="free_x", drop = TRUE)

temp_figwidth <- max(0.3*n_wells + 0.5*n_pools + 1.5, 4)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
batchreporter::make_subchunk(g, subchunk_name = "well_fraction_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents


HTO batch module v.1.0.0, Lauren Okada

scRNA Well

The following metrics summarize the sequencing and alignment by 10x well prior to un-hashing and hash-based cell filtering.

Contents

Data Quality UMAP

Expand Code

Check Dependencies

Reading in well info from h5 files

if(!exists("well_info")){
  # Merge Well Data
  stm("Reading in labeled h5 file well data")
  well_list <- lapply(all_h5, read_h5_well_meta)
  
  # Find common columns
  cols_keep <- Reduce(intersect, lapply(well_list, colnames))
  well_list <- lapply(well_list, function(x){x[,cols_keep]})
  
  well_info <- unique(do.call(rbind, well_list))
  setDT(well_info)
  well_info[, pool_id := gsub("C\\d+W\\d+", "", well_id)]
  
  remove("well_list")
}

Reading in metadata from h5 files

stm("Reading and merging all rna meta data")
rna_meta_list <- lapply(all_h5, H5weaver::read_h5_cell_meta)

# make sure column names are the same
col_list <- lapply(rna_meta_list, colnames)
all_cols_identical <- length(unique(col_list)) == 1
if(!all_cols_identical){
  all_columns <- unique(unlist(lapply(rna_meta_list, colnames)))
  common_columns <- Reduce(union, lapply(rna_meta_list, colnames))
  if(!all(all_columns %in% common_columns)){
    stm(sprintf("Warning: rna h5 files do not contain the same meta data columns. Keeping only the common columns. Removing columns %s.", 
        paste(setdiff(all_columns, common_columns), sep = ", ")))
  }
  rna_meta_list <- lapply(rna_meta_list, function(x){x[, common_columns]})
}

# merge metadata
rna_meta <- do.call(rbind, rna_meta_list)

# add metadata variables
rna_meta$fct_mito_umi <- rna_meta$n_mito_umi/rna_meta$n_umis
fct_mito_grp_cutoffs <- c(-Inf, 0.05, 0.10, 0.20, 0.30,Inf)
fct_mito_grp_labels <- c("0-0.05","0.05-0.10","0.10-0.20","0.20-0.30",">0.30")
rna_meta$fct_mito_group <- cut(rna_meta$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
                           labels = fct_mito_grp_labels)

Read in Counts from H5 Files

stm("Reading and merging all rna count matrices")

rna_count_list <- lapply(all_h5, H5weaver::read_h5_dgCMatrix, target = "matrix", 
                         feature_names = "id")
# make sure all matrices have same number of rows
if(!length(unique(sapply(rna_count_list, nrow)))==1){
  stop("RNA count matrixes have different numbers of rows")
} 

# make sure rows are in same order
row_order <- rownames(rna_count_list[[1]])
rna_count_list <- lapply(rna_count_list, function(x){x[row_order,]})

# make sure columns are in same order as metadata
order_check <- mapply(function(x, y){(all(x$barcodes==colnames(y)))}, rna_meta_list, rna_count_list)
if(!all(order_check)){
  # Reorder matrix columns to be consistent with metadata
  rna_count_list <- mapply( function(x, y){x[,y$barcodes]}, rna_count_list, rna_meta_list)
}

# merge
rna_counts <- do.call(cbind, rna_count_list)

featDF <- read_h5_feature_meta(all_h5[1])

Sample cells from each well to generate umaps

n_cells_sample <- 2000

stm(sprintf("Sampling %s cells per well (or all cells if fewer)", n_cells_sample))

set.seed(3)
sample_index_list <- lapply(rna_meta_list, function(x){
  sample_size = min(n_cells_sample, nrow(x))
  sort(sample(1:nrow(x), size = sample_size, replace = FALSE))
})

n_files <- length(sample_index_list)
rna_meta_list_sampled <- lapply(1:n_files, function(x){
 rna_meta_list[[x]][sample_index_list[[x]],]
})
rna_count_list_sampled <- lapply(1:n_files, function(x){
 rna_count_list[[x]][,sample_index_list[[x]]]
})

rna_meta_sampled <- do.call(rbind, rna_meta_list_sampled)
rna_meta_sampled$fct_mito_umi <- rna_meta_sampled$n_mito_umi/rna_meta_sampled$n_umis
rna_meta_sampled$fct_mito_group <- cut(rna_meta_sampled$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
                           labels = fct_mito_grp_labels)
rna_counts_sampled <- do.call(cbind, rna_count_list_sampled)

rm(list=c("rna_count_list", "rna_count_list_sampled","rna_meta_list"))
vnames_rna <- c("estimated_number_of_cells", "fraction_reads_in_cells", 
            "mean_reads_per_cell", "median_genes_per_cell", "median_umi_counts_per_cell", 
            "number_of_reads", "q30_bases_in_barcode", "q30_bases_in_rna_read", 
            "q30_bases_in_sample_index", "q30_bases_in_umi", "reads_mapped_antisense_to_gene", 
            "reads_mapped_confidently_to_exonic_regions", "reads_mapped_confidently_to_genome", 
            "reads_mapped_confidently_to_intergenic_regions","reads_mapped_confidently_to_intronic_regions", 
            "reads_mapped_confidently_to_transcriptome", "reads_mapped_to_genome", 
            "sequencing_saturation", "total_genes_detected", "valid_barcodes")

vnames_arc <- c("estimated_number_of_cells",
  "gex_fraction_of_transcriptomic_reads_in_cells","gex_mean_raw_reads_per_cell","gex_median_genes_per_cell",
  "gex_median_umi_counts_per_cell","gex_percent_duplicates","gex_q30_bases_in_barcode",
  "gex_q30_bases_in_read_2","gex_q30_bases_in_sample_index_i1","gex_q30_bases_in_sample_index_i2",
  "gex_q30_bases_in_umi","gex_reads_mapped_antisense_to_gene","gex_reads_mapped_confidently_to_exonic_regions",
  "gex_reads_mapped_confidently_to_genome","gex_reads_mapped_confidently_to_intergenic_regions", "gex_reads_mapped_confidently_to_intronic_regions",
  "gex_reads_mapped_confidently_to_transcriptome","gex_reads_mapped_to_genome","gex_reads_with_tso",
  "gex_sequenced_read_pairs","gex_total_genes_detected","gex_valid_barcodes", "gex_valid_umis")  

if(all(vnames_rna %in% colnames(well_info))){
  vnames <- vnames_rna
  vlabels <- c("Estimated Number of Cells", "Fraction Reads in Cells", 
            "Mean Reads per Cell", "Median Genes per Cell", "Median UMI per Cell", 
            "Number of Reads", "Q30 Bases in Barcode (%)", "Q30 Bases in RNA Read (%)", 
            "Q30 Bases in Sample Index (%)", "Q30 Bases in UMI (%)", "Reads Mapped Antisense to Gene (%)", 
            "Reads Mapped Confidently to Exonic Regions (%)", "Reads Mapped Confidently to Genome (%)", 
            "Reads Mapped Confidently to Intergenic Regions (%)",
            "Reads Mapped Confidently to Intronic Regions (%)", 
            "Reads Mapped Confidently to Transcriptome (%)", "Reads Mapped to Genome (%)", 
            "Sequencing Saturation (%)", "Total Genes Detected", "Valid Barcodes (%)")
  n_vars <- length(vnames)
  vartypes <- c(rep("Cells", 5), rep("Sequencing", 5), rep("Mapping", 7),"Sequencing","Cells","Sequencing")
  vartypes <- factor(vartypes,  levels = c("Cells", "Sequencing", "Mapping"))

  digitsRound <- c(0, 1, rep(0, 4), rep(1, 12), 0, 1)
  rna_data_type <- "rna"
} else if (all(vnames_arc %in% colnames(well_info))){
  vnames <- vnames_arc
  vlabels <- c("Estimated Number of Cells","Fraction Transcriptomic Reads in Cells",
            "Mean Raw Reads per Cell", "Median Genes per Cell", "Median UMI per Cell", "Percent Duplicates",
            "Q30 Bases in Barcode", "Q30 Bases in RNA Read 2", "Q30 Bases in Sample Index i1",
            "Q30 Bases in Sample Index i2", "Q30 Bases in UMI", "Reads Mapped Antisense to Gene", 
            "Reads Mapped Confidently to Exonic Regions", "Reads Mapped Confidently to Genome", 
            "Reads Mapped Confidently to Intergenic Regions", 
            "Reads Mapped Confidently to Intronic Regions", 
            "Reads Mapped Confidently to Transcriptome", "Reads Mapped to Genome", "Reads with TSO",
            "Sequenced Read Pairs", "Total Genes Detected", "Valid Barcodes", "Valid UMIs") 
  n_vars <- length(vnames)
  vartypes <- c(rep("Cells", 5), rep("Sequencing", 6), rep("Mapping", 7),"Sequencing","Sequencing","Cells","Sequencing","Sequencing")
  digitsRound <- c(0, 3, 0, 0, 0, rep(3, 14), 0, 0, 3, 3)
  

    rna_data_type <- "arc"
} else {
    vnames <- setdiff( colnames(well_info), c("well_id","pool_id"))
    vlabels <- gsub("_"," ", vnames)
    vartypes <- rep(NA, length(vnames))
    
    getDigits <- function(vNumeric){
      vChar <- as.character(vNumeric)
      is_decimal <- any(grepl("[.]", vChar))
      if(is_decimal){
        decimal_digits <- nchar(vChar) - (nchar(gsub("[.].*","", vChar))+1)
        n_digits <- max(decimal_digits)
      } else {
        n_digits <- 0
      }
      n_digits
    }
    digitsRound <- sapply(subset(well_info, select = vnames), getDigits)
    
    rna_data_type <- "unknown"
}

  df_vars <- data.frame(Category = vartypes,
                        Variable_name = vlabels,
                        Variable = vnames,
                        Round = digitsRound)

Pool Summary

Summary by pool or by entire non-hashed batch

stm("Creating scrna well cellranger summary table")

if(rna_data_type == "rna"){
  pool_info <- well_info %>%
  dplyr::group_by(pool_id) %>%
  dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"),
            total_reads = formatC(sum(number_of_reads), big.mark = ",", digits = 0, format = "f"), .groups = "drop")
} else if (rna_data_type == "arc"){
  pool_info <- well_info %>%
  dplyr::group_by(pool_id) %>%
  dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"),
            total_sequenced_read_pairs = formatC(sum(gex_sequenced_read_pairs), big.mark = ",", digits = 0, format = "f"), .groups = "drop")
} else {
  print("Warning: No total reads column detected in well metadata")
  pool_info <- well_info %>%  
  dplyr::group_by(pool_id) %>%  
  dplyr::summarize(total_cells = formatC(sum(estimated_number_of_cells), big.mark = ",", digits = 0, format = "f"), 
                   total_reads = NA, .groups = "drop")
}

names(pool_info) <- stringr::str_to_title(gsub("_", " ", names(pool_info)))
pool_info %>%
  gt::gt() %>%
  gt::cols_align(align = "right", columns = 2:3)

Pool Id Total Cells Total Sequenced Read Pairs
X070-P1 53,132 2,081,986,539
Return to Contents

Detailed Well Summary

stm("Creating detailied scrna well cellranger table")

unique_pools <- sort(unique(well_info$pool_id))  

well_summary_table <- well_info %>% 
  gather(key = Variable, value = Value, all_of(vnames)) %>%   # all variables long
  full_join(df_vars, by = "Variable") %>%  
  group_by(pool_id, Category, Variable, Variable_name) %>% 
  summarize(Median = formatC(median(Value, na.rm=T), big.mark = ",", digits = unique(Round), format = "f"),
            Range = get_range(Value, digits = unique(Round), verbose = F), 
            `CV%` = round(sd(Value)/mean(Value)*100, 1),
            .groups = "drop") %>% 
  arrange(Category, Variable_name) %>% 
  tidyr::pivot_wider(id_cols = c("Category", "Variable", "Variable_name"), 
                     names_from = pool_id, 
                     values_from = c("Median", "Range", "CV%"),
                     names_glue = "{pool_id}__{.value}",
                     names_sort = TRUE) %>%
  mutate(Plot = sprintf("[Plot](#%s)", Variable)) %>% 
  select(Category, Variable_name, contains(unique_pools), Plot) # reorder cols by pools first then stats, keep only the clean var name

gt_table <- well_summary_table %>% 
  gt::gt() %>% 
  gt::fmt_markdown(columns = "Plot") %>% # convert the plot column text to markdown to activate links. these links generated with plots below.
  gt::cols_width(vars(Category) ~ px(100),
                 vars(Variable_name) ~ px(150),
                 ends_with("Median") ~ px(100),
                 ends_with("Range") ~ px(100),
                 ends_with("CV%") ~ px(50),
                 vars(Plot) ~ px(60)) %>% 
  gt::tab_options(table.font.size = 11, column_labels.font.size = 12) %>%
  gt::tab_spanner_delim(delim = "__") %>% 
  gt::cols_align(align = "right",
                 columns = c(ends_with("Median"), ends_with("Range"))) %>% 
  gt::cols_align(align = "center",
                 columns = vars(Plot)) %>% 
  gt::cols_label(Variable_name = "Variable")

gt_table
Category Variable X070-P1 Plot
Median Range CV%
Cells Estimated Number of Cells 17,743 17,190-18,199 2.9
Cells Fraction Transcriptomic Reads in Cells 0.885 0.880-0.886 0.4
Cells Mean Raw Reads per Cell 39,754 35,614-42,379 8.7
Cells Median Genes per Cell 1,992 1,962-2,050 2.2
Cells Median UMI per Cell 4,130 4,048-4,297 3.1
Cells Total Genes Detected 30,628 30,599-30,756 0.3
Mapping Reads Mapped Antisense to Gene 0.166 0.165-0.171 1.7
Mapping Reads Mapped Confidently to Exonic Regions 0.392 0.384-0.393 1.2
Mapping Reads Mapped Confidently to Genome 0.905 0.904-0.905 0.1
Mapping Reads Mapped Confidently to Intergenic Regions 0.071 0.071-0.072 0.3
Mapping Reads Mapped Confidently to Intronic Regions 0.442 0.441-0.449 1.0
Mapping Reads Mapped Confidently to Transcriptome 0.659 0.655-0.661 0.5
Mapping Reads Mapped to Genome 0.968 0.968-0.970 0.1
Sequencing Percent Duplicates 0.775 0.758-0.781 1.5
Sequencing Q30 Bases in Barcode 0.958 0.956-0.959 0.2
Sequencing Q30 Bases in RNA Read 2 0.927 0.922-0.930 0.4
Sequencing Q30 Bases in Sample Index i1 0.974 0.940-0.974 2.0
Sequencing Q30 Bases in Sample Index i2 0.955 0.942-0.958 0.9
Sequencing Q30 Bases in UMI 0.959 0.957-0.959 0.1
Sequencing Reads with TSO 0.119 0.116-0.130 6.1
Sequencing Sequenced Read Pairs 705,357,243 648,135,670-728,493,626 6.0
Sequencing Valid Barcodes 0.940 0.939-0.941 0.1
Sequencing Valid UMIs 0.999 0.999-0.999 0.0

Expand table of statistics per well

qc_table(well_info)

Return to Contents

Plots of Well-Level Metrics

stm("Generating sequencing and alignment QC plots")

verpal <- hcl.colors(n = n_vars, palette = "viridis")

# Plots
for (i in seq_along(vnames)){
  df <- data.table::copy(well_info)
  spec <- vnames[i]
  slabel <- vlabels[i]
  df <- as.data.frame(df)
  df$spec_col <- df[,spec]
  med_val <- median(df$spec_col)
  cv <- round(sd(df$spec_col)/mean(df$spec_col)*100, 2)
  n <- sum(!is.na(df$spec_col))
  
  g <- ggplot(df, aes(well_id, spec_col)) +
    geom_bar(stat = "identity", fill = verpal[i]) + 
    geom_hline(yintercept = med_val, linetype = "dashed", color = "red")+
    scale_y_continuous(sec.axis = dup_axis(breaks = med_val, labels = med_val, name = ""))+
    xlab("Well") +
    ylab(slabel) +
    facet_wrap(~pool_id, ncol = n_pools, scales = "free_x", drop = TRUE) +
    ggtitle(slabel, 
            subtitle = sprintf("Median=%s    CV=%.1f%%    N=%s", med_val, cv, n)) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
  
  # Plot-specific hyperlink definition
  cat(sprintf('\n<a id="%s"></a>', spec), labels = "", sep = "\n")
  
  # Output plot
  suppressWarnings(print(g))
  
  # Link back to top of section
  cat("  \n[Return to Contents](#rna_seq_well_top)", labels = "", sep = "\n")
  
}


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents


Return to Contents

Count Stats per Well

Read Counts by Well

# Reads per hto cat
stm("Generating read count violin plots")

# Reads per hto plot
g_read <- qc_violin_plot(rna_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_reads",
                        name_y = "N Reads per Cell",
                        log_y = TRUE,
                        fill = "dodgerblue") +
  ggtitle("Reads per Well")

temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4

batchreporter::make_subchunk(g_read, subchunk_name = "well_read_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

UMI Counts by Well

# Reads per hto cat
stm("Generating umi count violin plots")

# UMI per hto plot
g_umi <- qc_violin_plot(rna_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_umis",
                        name_y = "N UMIs per Cell",
                        log_y = TRUE,
                        fill = "purple") +
  ggtitle("UMIs per Well")

temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4

batchreporter::make_subchunk(g_umi, subchunk_name = "well_umi_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Gene Counts by Well

# Reads per hto cat
stm("Generating gene count violin plots")

# Reads per hto plot
g_genes <- qc_violin_plot(rna_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_genes",
                        name_y = "N Genes per Cell",
                        log_y = TRUE,
                        fill = "orangered") +
  ggtitle("Genes per Well")

temp_figwidth = max(5, 0.5 + n_wells*0.4)
temp_figheight = 4

batchreporter::make_subchunk(g_genes, subchunk_name = "well_gene_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Mitochondrial UMIs

Fraction Mitochondrial UMI

g_bar <- ggplot(rna_meta, aes(well_id,  fill = factor(fct_mito_group, 
                                                      levels = rev(fct_mito_grp_labels)))) +
  geom_bar() +
  labs(fill = "Fraction Mitochondrial UMIs") +
  ylab("N Cells") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

g_pl <- plotly::ggplotly(g_bar) %>%
  layout(hovermode = 'compare')

tempwidth <- n_samples*0.4 + 2

make_subchunk(g_pl, subchunk_name = "fraction_mito_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = tempwidth, fig.height = 6, echo = FALSE))

Return to Contents

Fraction Mito UMIs by UMI Counts

stm("Generating mt umi vs umi count scatter plots")

# Reads per hto plot
g_mito_umi <- ggplot(rna_meta, aes(n_umis, fct_mito_umi)) +
    geom_point(color = "purple", alpha = 0.2) +
    xlab("UMI per Cell (log10 scale)") +
    ylab("Fraction Mitochondrial UMI") +
    scale_x_log10() +
    ggtitle("Fraction Mitochondrial UMI vs UMI per Cell") 
g_mito_umi

Expand Per-Well Plot

stm("Generating mt umi vs umi count scatter plots by well")

n_wells_plot <- length(unique(rna_meta$well_id))
ncols_plot <- 6
nrows_plot <- ceiling(n_wells_plot/ncols_plot)

# Reads per hto plot
g_mito_umi_well <- g_mito_umi +
    facet_wrap(~well_id, ncol = ncols_plot)

temp_figwidth <- 2*min(n_wells_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3

make_subchunk(g_mito_umi_well, subchunk_name = "fraction_mito_umis_well_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))

Return to Contents

Fraction Mito UMIs by Gene Counts

stm("Generating mt umi vs gene count scatter plots")

# Reads per hto plot
g_mito_gene <- ggplot(rna_meta, aes(n_genes, fct_mito_umi)) +
    geom_point(color = "orangered", alpha = 0.2) +
    xlab("Genes per Cell (log10 scale)") +
    ylab("Fraction Mitochondrial UMI") +
    scale_x_log10() +
    ggtitle("Fraction Mitochondrial UMI vs UMI per Cell") 
g_mito_gene

Expand Per-Well Plot

stm("Generating mt umi vs gene count scatter plots by well")

n_wells_plot <- length(unique(rna_meta$well_id))
ncols_plot <- 6
nrows_plot <- ceiling(n_wells_plot/ncols_plot)

# Reads per hto plot
g_mito_gene_well <- g_mito_gene +
    facet_wrap(~well_id, ncol = ncols_plot)

temp_figwidth <- 2*min(n_wells_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3

make_subchunk(g_mito_gene_well, subchunk_name = "fraction_mito_genes_well_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))

Return to Contents

Data Quality UMAP

Expand code

# Create Seurat object
stm("Creating Seurat object from merged data for scRNA UMAP")
merged_so <- Seurat::CreateSeuratObject(counts = rna_counts_sampled)

# Normalize data
stm("Normalizing data for scRNA UMAP")
merged_so <- Seurat::NormalizeData(object = merged_so,
                                   normalization.method = "LogNormalize",
                                   scale.factor = 10000,
                                   margin = 1)

normCounts <- merged_so[["RNA"]]@data
pc_dims <- min(ncol(merged_so) - 1, 50)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))

stm("Finding Variable Features for scRNA UMAP")
merged_so <- FindVariableFeatures(object = merged_so)

stm("Scaling Data for scRNA UMAP")
merged_so <- ScaleData(object = merged_so, verbose = FALSE)

stm("Running PCA for scRNA UMAP")
merged_so <- RunPCA(object = merged_so, npcs = pc_dims, verbose = FALSE)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))

stm("Determining dimensionality via jackstraw method for scRNA UMAP")

labels_order <- rna_meta_sampled$well_id[match(colnames(merged_so),rna_meta_sampled$barcodes)]
names(labels_order) <- colnames(merged_so)

jackstraw_cells <- sample_cells(labels_order, 500, seed = 3030)

jackstraw_so <- merged_so[, jackstraw_cells]

jackstraw_so <- JackStraw(object = jackstraw_so,
                         dims = pc_dims,
                         num.replicate = 50,  
                         verbose = FALSE)
jackstraw_so <- ScoreJackStraw(object = jackstraw_so,
                              dims = 1:pc_dims)

pc_pvals <- jackstraw_so@reductions$pca@jackstraw$overall.p.values[,2]
good_pcs <- pc_pvals < 0.05

nPC <- sum(good_pcs)

pc_var <- Stdev(merged_so, reduction = "pca")^2
total_var <- merged_so@reductions$pca@misc$total.variance
var_selected_pc <- sum(pc_var[good_pcs])/total_var
cumvar_string <- sprintf(fmt = "%.1f", var_selected_pc*100)

stm(sprintf("Selected %s significant pcs via JackStraw, %s%% explained variation for scRNA UMAP", nPC, cumvar_string))

pc_embeddings <- merged_so@reductions$pca@cell.embeddings

# stm(sprintf("Using %s principal components for umap",nPC))
# Run UMAP
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))
stm(sprintf("Running UMAP on selected coordinates for scRNA UMAP"))

merged_so <- Seurat::RunUMAP(merged_so,
                                   dims = c(1:50)[good_pcs],
                                   umap.method = "uwot",
                                   seed.use = 3,
                                   verbose = FALSE)
# Get UMAP coordinates
umapDF <- merged_so[["umap"]]@cell.embeddings %>%
          as.data.frame() %>%
          dplyr::rename(UMAP_1_merged = UMAP_1, UMAP_2_merged = UMAP_2) %>%
          tibble::rownames_to_column(var = "barcodes")
umapDF <- merge(umapDF, rna_meta_sampled, by = "barcodes")
rownames(umapDF) <- umapDF$barcodes

umapDF <- umapDF[rownames(merged_so@"meta.data"), , drop = F]

normCounts <- merged_so[["RNA"]]@data
if(!all(rownames(umapDF) == colnames(normCounts))){
  stop("Merged UMAP dataframe does not match columns in normalized counts")
}

Batch-level UMAP of 2000 randomly sampled cells per well using 34 principal components (21.3% explained variance) selected by jackstraw.

stm("Plotting Batch scRNA UMAP by Data Quality Metrics")

# Gene scaling
max_genes <- max(8000, rna_meta$n_genes)
max_umi <- max(60000, rna_meta$n_umis)
point_size <- 0.2


# Cell Types
g_base <- ggplot(umapDF, aes(UMAP_1_merged, UMAP_2_merged))

# fraction mitochondrial umi
stm("Plotting Fraction Mito UMAP")
g1 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "Fraction Mitochondrial UMIs",
                 point_size = point_size,
                 color_col = "fct_mito_umi",
                 scale_color_fun = scale_color_fct_mito)

# N Genes
stm("Plotting N Genes UMAP")
g2 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "N Genes",
                 point_size = point_size,
                 color_col = "n_genes",
                 scale_color_fun = scale_color_genes(max_genes)
)

# N UMIs
stm("Plotting N UMIs UMAP")
g3 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "N UMIs",
                 point_size = point_size,
                 color_col = "n_umis",
                 scale_color_fun = scale_color_umis(max_umi)
)


# Wells
stm("Plotting Well UMAP")
cols_well <- H5weaver::varibow(n_colors = length(unique(umapDF$well_id)))

scale_color_well <- function(...){
    scale_color_manual(values = cols_well, ...)
}
g4 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "Well ID",
                 point_size = point_size,
                 color_col = "well_id",
                 scale_color_fun = scale_color_well
) +
  guides(colour = guide_legend(override.aes = list(size = 3)))


aligned_plots <- cowplot::align_plots(g1, g2, g3, g4, align = "hv", axis = "tblr")  # uniform plot sizing

cowplot::plot_grid(aligned_plots[[1]],
                   aligned_plots[[2]],
                   aligned_plots[[3]],
                   aligned_plots[[4]],
                   ncol = 2)

Return to Contents


scRNA seq report well module v.1.0.0, Lauren Okada

scRNA Sample

scrna_seq_sample_module_version <- "1.0.0"

Contents

Data Quality UMAP

Expand Code

# Allow display of large images without shrinking to page width
<style>
  .superbigimage{
      overflow-x:scroll;
  }

  .superbigimage img{
     max-width: none;
  }

</style>
    

Check analysis dependencies

# Config
config_list<- batchreporter::load_config(in_config)

hash_key <- config_list$hash_key
sample_column_name <- config_list$sample_column_name

# Sample Key
df_key <- data.table::fread(in_key)

pools <- unique(df_key$PoolID)
n_pools <- length(pools)

wells <- unlist(strsplit(unique(df_key$WellID), split = ";"))
n_wells <- length(wells)

samples <- unique(df_key$SampleID)
n_samples <- length(samples)

Reading in metadata from h5 files

stm("Reading and merging all rna meta data")
rna_meta_list <- lapply(all_h5, H5weaver::read_h5_cell_meta)

# make sure column names are the same
col_list <- lapply(rna_meta_list, colnames)
all_cols_identical <- length(unique(col_list)) == 1
if(!all_cols_identical){
  all_columns <- unique(unlist(lapply(rna_meta_list, colnames)))
  common_columns <- Reduce(union, lapply(rna_meta_list, colnames))
  if(!all(all_columns %in% common_columns)){
    stm(sprintf("Warning: rna h5 files do not contain the same meta data columns. Keeping only the common columns. Removing columns %s.", 
        paste(setdiff(all_columns, common_columns), sep = ", ")))
  }
  rna_meta_list <- lapply(rna_meta_list, function(x){x[, common_columns]})
}

# merge metadata
rna_meta <- do.call(rbind, rna_meta_list)

# add metadata variables
rna_meta$fct_mito_umi <- rna_meta$n_mito_umi/rna_meta$n_umis
fct_mito_grp_cutoffs <- c(-Inf, 0.05, 0.10, 0.20, 0.30,Inf)
fct_mito_grp_labels <- c("0-0.05","0.05-0.10","0.10-0.20","0.20-0.30",">0.30")
rna_meta$fct_mito_group <- cut(rna_meta$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
                           labels = fct_mito_grp_labels)

Read in Counts from H5 Files

stm("Reading and merging all rna count matrices")

rna_count_list <- lapply(all_h5, H5weaver::read_h5_dgCMatrix, target = "matrix", 
                         feature_names = "id")
# make sure all matrices have same number of rows
if(!length(unique(sapply(rna_count_list, nrow)))==1){
  stop("RNA count matrixes have different numbers of rows")
} 

# make sure rows are in same order
row_order <- rownames(rna_count_list[[1]])
rna_count_list <- lapply(rna_count_list, function(x){x[row_order,]})

# make sure columns are in same order as metadata
order_check <- mapply(function(x, y){(all(x$barcodes==colnames(y)))}, rna_meta_list, rna_count_list)
if(!all(order_check)){
  # Reorder matrix columns to be consistent with metadata
  rna_count_list <- mapply( function(x, y){x[,y$barcodes]}, rna_count_list, rna_meta_list)
}

# merge
rna_counts <- do.call(cbind, rna_count_list)

featDF <- read_h5_feature_meta(all_h5[1])

Read in HTO files

stm("Reading in hto key for hashed RNA analysis")
# Read in all hto key to match hto names to barcodes
hto_key <- system.file(file.path("reference",hash_key), package = "HTOparser")  # Parameterize this value
in_hto_key <- fread(hto_key, header = FALSE, col.names = c("hto_barcode","hto_name")) %>% 
  mutate(hto_order = as.numeric(gsub("HT","", hto_name))) %>% 
  mutate(hto_name = factor(hto_name, levels = hto_name[order(hto_order)])) %>% # use HT number value to reorder the HT levels
  select(-hto_order)

# Read in all hto metadata files, check expected numbers vs input well info
all_hto_meta <- list.files(path = in_hto, 
                           pattern = "hto_category_table.csv.gz$", 
                           full.names = TRUE, recursive = TRUE)
n_hto_meta <- length(all_hto_meta)
if(n_hto_meta == 0){
  stop(sprintf("No 'hto_category_table.csv.gz' files found in %s", in_hto))
} else if (n_hto_meta < n_wells){
  hto_meta_warn <- sprintf("Input number of 'hto_category_table.csv.gz' files (%s) is fewer than number of wells (%s) in sample key",
                           n_json, n_wells)
  warning(hto_meta_warn)
  hto_warning_list <- c(hto_warning_list, hto_meta_warn)
}
stm(paste0("IN HTO Metadata Files        :\n\t", paste(all_hto_meta, collapse = "\n\t")))
cat(paste0("IN HTO Metadata Files        :\n\t", paste(all_hto_meta, collapse = "\n\t"))) 
## IN HTO Metadata Files        :
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W1_hto_category_table.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W2_hto_category_table.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W3_hto_category_table.csv.gz
stm("Reading in hto category metadata for hashed RNA analysis")
hto_meta_list <- lapply(all_hto_meta, fread)
hto_meta_wells <- gsub("_.*","",basename(all_hto_meta))
hto_meta_list <- mapply(function(x,y){x$well_id <- y; x}, hto_meta_list, hto_meta_wells, SIMPLIFY = F)
hto_meta <- do.call(rbind, hto_meta_list)
rm("hto_meta_list")

Add in HTO Metadata

stm("Formatting in hto category metadata for hashed RNA analysis")
hto_meta[in_hto_key, on = 'hto_barcode', hto_name := i.hto_name]  # merge in the hto names
hto_meta[ , pool_id := gsub("C\\dW\\d","", well_id)]
hto_meta[ , sample_hto:= sprintf("%s\n%s", hto_name, get(sample_column_name))]
hto_meta[ , sample_hto_pool:= sprintf("%s\n%s%s", hto_name, get(sample_column_name), pool_id)]
hto_meta[ , hto_order:=  as.numeric(gsub("HT","", hto_name))]
hto_meta[ , sample_hto_pool:=  factor(sample_hto_pool, levels = unique(sample_hto_pool[order(pool_id, hto_order)]))]
hto_meta[ , hto_order:= NULL]
hto_meta[ , hto_category:= factor(hto_category, levels = c("no_hash", "singlet", "doublet", "multiplet"))]

# hto_meta_singlet <- subset(hto_meta, hto_category=="singlet")
stm("Merging hto category metadata with RNA metadata")

rna_hto_meta <- rna_meta %>%
    left_join(hto_meta, by = c("original_barcodes"="cell_barcode", "well_id" = "well_id","pool_id"="pool_id"))

hto_meta_singlet <- subset(rna_hto_meta, hto_category=="singlet")
hto_meta_singlet <- as.data.table(hto_meta_singlet)
hto_meta_singlet_list <- split(hto_meta_singlet, by = sample_column_name)

Sample singlet cells from each well and sample to generate umaps

n_cells_sample <- 2000

stm(sprintf("Sampling %s cells per sample and well (or all cells if fewer)", n_cells_sample))

set.seed(3)
sample_index_list <- lapply(hto_meta_singlet_list, function(x){
  sample_size = min(n_cells_sample, nrow(x))
  sort(sample(1:nrow(x), size = sample_size, replace = FALSE))
})

n_files <- length(sample_index_list)
rna_meta_list_sampled <- lapply(1:n_files, function(x){
 hto_meta_singlet_list[[x]][sample_index_list[[x]],]
})  

rna_count_list_sampled <- lapply(1:n_files, function(x){
 rna_counts[, rna_meta_list_sampled[[x]]$barcodes]
})

rna_meta_sampled <- do.call(rbind, rna_meta_list_sampled)
rna_meta_sampled$fct_mito_umi <- rna_meta_sampled$n_mito_umi/rna_meta_sampled$n_umis
rna_meta_sampled$fct_mito_group <- cut(rna_meta_sampled$fct_mito_umi, breaks =fct_mito_grp_cutoffs,
                           labels = fct_mito_grp_labels)
rna_counts_sampled <- do.call(cbind, rna_count_list_sampled)

rm(list=c("rna_count_list", "rna_count_list_sampled","rna_meta_list"))

Hash-Based Summary

stm("Output pool based summary table")
hto_singlet_summary <- hto_meta_singlet[ ,.(n_singlet_cells = .N,
                             median_reads = median(n_reads),
                             median_umis = median(n_umis),
                             median_genes = median(n_genes)), 
                             by = setNames(list(pool_id, hto_name, hto_barcode, get(sample_column_name)), c('pool_id', 'hto_name','hto_barcode', sample_column_name))]
                          # by = .(pool_id, hto_name, hto_barcode, get(sample_column_name))]
setorder(hto_singlet_summary, pool_id, hto_name)
setcolorder(hto_singlet_summary, c("pool_id",sample_column_name, "hto_name", "hto_barcode", "n_singlet_cells", 
                            "median_reads", "median_umis", "median_genes"))

qc_table(hto_singlet_summary)

Return to Contents

Well Count per HTO Barcode Plot

stm("Plotting well count per hto")

plot_list <- list()
for (i in seq_along(pools)){
  plot_list[[i]] <- qc_aligned_barplot_facet(meta = hto_meta_singlet[pool_id == pools[i],],
                   category_x = "sample_hto_pool",
                   category_y = "well_id",
                   category_name = "Well ID",
                   name_x = "HTO/Sample",
                   colorset_y = "varibow",
                   name_y = "Number of Cells",
                   facet_formula = formula("~pool_id"), nrow = 1, scales ="free_x", drop = TRUE) 
}

g <- cowplot::plot_grid(plotlist = plot_list, ncol = 2)
temp_figwidth <- max(0.6*n_wells + 1*n_pools + 3, 8)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
batchreporter::make_subchunk(g, subchunk_name = "scrna_sample_hto_count_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE, class.output = "superbigimage"), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Well Fraction per HTO Barcode Plot

stm("Plotting well fraction per hto")

g <- qc_stacked_barplot_facet(meta = hto_meta_singlet,
                   category_x = "sample_hto_pool",
                   category_y = "well_id",
                   category_name = "Well ID",
                   name_x = "HTO/Sample",
                   colorset_y = "varibow",
                   name_y = "Fraction of Cells",
                   as_fraction = TRUE , 
                   facet_formula = formula("~pool_id"), nrow = 1, scales ="free_x", drop = TRUE)

temp_figwidth <- max(0.3*n_wells + 0.5*n_pools + 1.5, 4)
temp_figheight <- ceiling(n_pools/2)*4 + 0.4
batchreporter::make_subchunk(g, subchunk_name = "scrna_sample_fraction_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Count Stats per HTO Barcode

Read Counts by HTO Category and Barcode

# Reads per hto cat
stm("Generating sample read count violin plots")
category_reads_violins <- qc_violin_plot(rna_hto_meta,
                                         category_x = "hto_category",
                                         name_x = "HTO Category",
                                         column_y = "n_reads",
                                         name_y = "N Reads per Cell",
                                         fill = "dodgerblue")

# Reads per hto plot
g_read <- qc_violin_plot(hto_meta_singlet,
                        category_x = "sample_hto_pool",
                        name_x = "HTO/Sample (singlets)",
                        column_y = "n_reads",
                        name_y = "N Reads per Cell, Singlets",
                        log_y = TRUE,
                        fill = "dodgerblue") +
  ggtitle("Reads per Cell")
# g_read

reads_violin_list <- list(category_reads_violins, 
                          g_read)
g_grid <- plot_grid(plotlist = reads_violin_list,
          ncol = 2, rel_widths = c(1, 1+n_samples/4),
          nrow = 1, align = "h")
          
                    
temp_figwidth = 3 + n_samples*0.4
temp_figheight = 4

batchreporter::make_subchunk(g_grid, subchunk_name = "scrna_sample_counts_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Expand plots of reads/cell distributions by hto and well

stm("Plotting read count violin plots per hto and well")  

all_sample_hto_pool <- unique(sprintf("%s\n%s%s",df_key$HashTag,df_key$SampleID,  df_key$PoolID))

plot_list <- list()
for(i in seq_along(all_sample_hto_pool)){
    temp_meta <- filter(hto_meta_singlet, sample_hto_pool == all_sample_hto_pool[i])
    g <- qc_violin_plot(temp_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_reads",
                        name_y = "N Reads per Cell",
                        fill = "dodgerblue") +
      theme(text = element_text(size =8)) +
      ggtitle(all_sample_hto_pool[i])
    plot_list[[i]] <- g
}

g_grid <- cowplot::plot_grid(plotlist = plot_list, align = "tblr", ncol = 6)

temp_figwidth <- 15
temp_figheight <- ceiling(n_samples/6)*4 + 0.4
batchreporter::make_subchunk(g_grid, subchunk_name = "reads_well_hto_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

UMI Counts by HTO Category and Barcode

# UMI per category plot
stm("Generating sample umi count violin plots")
category_umis_violins <- qc_violin_plot(rna_hto_meta,
                                         category_x = "hto_category",
                                         name_x = "HTO Category",
                                         column_y = "n_umis",
                                         name_y = "N UMIs per Cell",
                                         fill = "purple")


# UMI per cell plot
g_umi <- qc_violin_plot(hto_meta_singlet,
                        category_x = "sample_hto_pool",
                        name_x = "HTO/Sample (singlets)",
                        column_y = "n_umis",
                        name_y = "N UMIs per Cell, Singlets",
                        fill = "purple") +
  ggtitle("UMIs per Cell")
# g_umi


umis_violin_list <- list(category_umis_violins, 
                          g_umi)
g_grid <- plot_grid(plotlist = umis_violin_list,
          ncol = 2, 
          # rel_widths = c(1, 4),
          rel_widths = c(1, 1+n_samples/4),
          nrow = 1, align = "h")
          
temp_figwidth = 3 + n_samples*0.4
temp_figheight = 4

batchreporter::make_subchunk(g_grid, subchunk_name = "scrna_sample_umi_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Expand plots of UMIs/cell distributions per per well by hto

stm("Plotting umi count violin plots per hto and well")  

all_sample_hto_pool <- unique(sprintf("%s\n%s%s",df_key$HashTag,df_key$SampleID,  df_key$PoolID))

plot_list <- list()
for(i in seq_along(all_sample_hto_pool)){
    
    temp_meta <- filter(hto_meta_singlet, sample_hto_pool == all_sample_hto_pool[i])
    g <- qc_violin_plot(temp_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_umis",
                        name_y = "N UMIs per Cell",
                        fill = "purple") +
      theme(text = element_text(size =8)) +
      ggtitle(all_sample_hto_pool[i])
    plot_list[[i]] <- g
}

g_grid <- cowplot::plot_grid(plotlist = plot_list, align = "tblr", ncol = 6)

temp_figwidth <- 15
temp_figheight <- ceiling(n_samples/6)*4 + 0.4
batchreporter::make_subchunk(g_grid, subchunk_name = "umis_well_hto_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Gene Counts by HTO Category and Barcode

# Genes per category plot
category_genes_violins <- qc_violin_plot(rna_hto_meta,
                                         category_x = "hto_category",
                                         name_x = "HTO Category",
                                         column_y = "n_genes",
                                         name_y = "N Genes per Cell",
                                         fill = "orangered")  

# Genes per cell plot
g_genes <- qc_violin_plot(hto_meta_singlet,
                        category_x = "sample_hto_pool",
                        name_x = "HTO/Sample (singlets)",
                        column_y = "n_genes",
                        name_y = "N Genes per Cell, Singlets",
                        fill = "orangered") +
  ggtitle("Genes per Cell")


genes_violin_list <- list(category_genes_violins, 
                          g_genes)
g_grid <- plot_grid(plotlist = genes_violin_list,
          ncol = 2, 
          rel_widths = c(1, 1+n_samples/4),
          # rel_widths = c(1, 4),
          nrow = 1, align = "h")
          
temp_figwidth = 3 + n_samples*0.4
temp_figheight = 4

batchreporter::make_subchunk(g_grid, subchunk_name = "scrna_sample_genes_violin_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, 
                                    warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Expand plots of genes/cell distributions per per well by hto

stm("Plotting gene count violin plots per hto and well")  

all_sample_hto_pool <- unique(sprintf("%s\n%s%s",df_key$HashTag,df_key$SampleID,  df_key$PoolID))

plot_list <- list()
for(i in seq_along(all_sample_hto_pool)){
    
    temp_meta <- filter(hto_meta_singlet, sample_hto_pool == all_sample_hto_pool[i])
    g <- qc_violin_plot(temp_meta,
                        category_x = "well_id",
                        name_x = "Well",
                        column_y = "n_genes",
                        name_y = "N Genes per Cell",
                        fill = "orangered") +
      theme(text = element_text(size =8)) +
      ggtitle(all_sample_hto_pool[i])
    plot_list[[i]] <- g
}

g_grid <- cowplot::plot_grid(plotlist = plot_list, align = "tblr", ncol = 6)

temp_figwidth <- 15
temp_figheight <- ceiling(n_samples/6)*4 + 0.4
batchreporter::make_subchunk(g_grid, subchunk_name = "genes_well_hto_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Mitochondrial UMIs

Fraction Mitochondrial UMIs

g_bar <- ggplot(hto_meta_singlet, aes(!!as.name(sample_column_name),  
                                      fill = factor(fct_mito_group, 
                                                    levels = rev(fct_mito_grp_labels)))) +
  geom_bar() +
  labs(fill = "Fraction Mitochondrial UMIs") +
  ylab("N Cells") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

g_pl <- plotly::ggplotly(g_bar) %>%
  layout(hovermode = 'compare')

tempwidth <- n_samples*0.4 + 2

make_subchunk(g_pl, subchunk_name = "scrna_sample_fraction_mito_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = tempwidth, fig.height = 6, echo = FALSE))

Fraction Mito UMIs by UMI Counts

# Reads per hto cat
stm("Generating mt umi vs umi count scatter plots by well")

# Reads per hto plot
g_mito_umi <- ggplot(hto_meta_singlet, aes(n_umis, fct_mito_umi)) +
    geom_point(color = "purple", alpha = 0.2) +
    xlab("UMI per Cell (log10 scale)") +
    ylab("Fraction Mitochondrial UMI") +
    scale_x_log10() +
    ggtitle("Fraction Mitochondrial UMI vs UMI per Cell") 

n_samples_plot <- length(unique(hto_meta_singlet[[sample_column_name]]))
ncols_plot <- 6
nrows_plot <- ceiling(n_samples_plot/ncols_plot)

# Reads per hto plot
g_mito_umi_sample <- g_mito_umi +
    facet_wrap(as.formula(paste0("~", sample_column_name)), 
               ncol = ncols_plot)

temp_figwidth <- 2*min(n_samples_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3

make_subchunk(g_mito_umi_sample, subchunk_name = "scrna_sample_fraction_mito_umis_sample_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))

Expand Per-Well Plot

# Reads per hto cat
stm("Generating mt umi vs umi count scatter plots per hto and well")

all_sample_hto_pool <- unique(sprintf("%s\n%s%s",df_key$HashTag,df_key$SampleID,  df_key$PoolID))

plot_list <- list()
for(i in seq_along(all_sample_hto_pool)){
    temp_meta <- hto_meta_singlet %>%
        dplyr::filter(sample_hto_pool == all_sample_hto_pool[i])
    g <- ggplot(temp_meta, aes(n_umis, fct_mito_umi)) +
      geom_point(color = "purple", alpha = 0.2) +
      xlab("UMI per Cell (log10 scale)") +
      ylab("Fraction Mitochondrial UMI") +
      scale_x_log10() +
      facet_grid( ~ well_id ) +
      ggtitle(all_sample_hto_pool[i])
    plot_list[[i]] <- g
}

g_grid <- cowplot::plot_grid(plotlist = plot_list, align = "tblr", ncol = 1)

temp_figwidth <- 15
temp_figheight <- n_samples*4 + 0.4
batchreporter::make_subchunk(g_grid, subchunk_name = "fctmito_umiscatter_well_hto_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Fraction Mito UMIs by Gene Counts

# Genes per hto cat
stm("Generating mt umi vs gene count scatter plots by well")

# Genes per hto plot
g_mito_gene <- ggplot(hto_meta_singlet, aes(n_genes, fct_mito_umi)) +
    geom_point(color = "orangered", alpha = 0.2) +
    xlab("Genes per Cell (log10 scale)") +
    ylab("Fraction Mitochondrial UMI") +
    scale_x_log10() +
    ggtitle("Fraction Mitochondrial UMI vs Genes per Cell") 

n_samples_plot <- length(unique(hto_meta_singlet[[sample_column_name]]))
ncols_plot <- 6
nrows_plot <- ceiling(n_samples_plot/ncols_plot)

# Reads per hto plot
g_mito_genes_sample <- g_mito_gene +
    facet_wrap(as.formula(paste0("~", sample_column_name)), 
               ncol = ncols_plot)

temp_figwidth <- 2*min(n_samples_plot, ncols_plot)+0.3
temp_figheight <- 2*nrows_plot + 0.3

make_subchunk(g_mito_genes_sample, subchunk_name = "scrna_sample_fraction_mito_genes_sample_subchunk", quiet_knit = T,
              chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight, echo = FALSE))

Expand Per-Well Plot

# Reads per hto cat
stm("Generating mt umi vs gene count scatter plots per hto and well")

all_sample_hto_pool <- unique(sprintf("%s\n%s%s",df_key$HashTag,df_key$SampleID,  df_key$PoolID))

plot_list <- list()
for(i in seq_along(all_sample_hto_pool)){
    temp_meta <- hto_meta_singlet %>%
        dplyr::filter(sample_hto_pool == all_sample_hto_pool[i])
    g <- ggplot(temp_meta, aes(n_genes, fct_mito_umi)) +
      geom_point(color = "orangered", alpha = 0.2) +
      xlab("UMI per Cell (log10 scale)") +
      ylab("Fraction Mitochondrial UMI") +
      scale_x_log10() +
      facet_grid( ~ well_id ) +
      ggtitle(all_sample_hto_pool[i])
    plot_list[[i]] <- g
}

g_grid <- cowplot::plot_grid(plotlist = plot_list, align = "tblr", ncol = 1)

temp_figwidth <- 15
temp_figheight <- n_samples*4 + 0.4
batchreporter::make_subchunk(g_grid, subchunk_name = "fctmito_genescatter_well_hto_subchunk", 
              chunk_opt_list = list(fig.height = temp_figheight, fig.width = temp_figwidth, warning = FALSE), 
              quiet_knit = TRUE)
(function () {    g})()

Return to Contents

Data Quality UMAP

Expand Code

# Create Seurat object
stm("Creating Seurat object from merged data for hashed RNA umap")
merged_so <- Seurat::CreateSeuratObject(counts = rna_counts_sampled)

# Normalize data
stm("Normalizing data for hashed RNA umap")
merged_so <- Seurat::NormalizeData(object = merged_so,
                                   normalization.method = "LogNormalize",
                                   scale.factor = 10000,
                                   margin = 1)

normCounts <- merged_so[["RNA"]]@data
pc_dims <- min(ncol(merged_so) - 1, 50)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))

stm("Finding Variable Features for hashed RNA umap")
merged_so <- Seurat::FindVariableFeatures(object = merged_so, )

stm("Scaling Data for hashed RNA umap")
merged_so <- Seurat::ScaleData(object = merged_so, verbose = FALSE)

stm("Running PCA for hashed RNA umap")
merged_so <- Seurat::RunPCA(object = merged_so, npcs = pc_dims, verbose = FALSE)
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))

stm("Determining dimensionality via jackstraw method for hashed RNA umap")

labels_order <- hto_meta_singlet[,get(sample_column_name)][match(colnames(merged_so),hto_meta_singlet$barcodes)]
names(labels_order) <- colnames(merged_so)

jackstraw_cells <- sample_cells(labels_order, 100, seed = 3030)

jackstraw_so <- merged_so[,jackstraw_cells]

jackstraw_so <- Seurat::JackStraw(object = jackstraw_so,
                         dims = pc_dims,
                         num.replicate = 50,  
                         verbose = FALSE)
jackstraw_so <- Seurat::ScoreJackStraw(object = jackstraw_so,
                              dims = 1:pc_dims)

pc_pvals <- jackstraw_so@reductions$pca@jackstraw$overall.p.values[,2]
good_pcs <- pc_pvals < 0.05

nPC <- sum(good_pcs)

pc_var <- Stdev(merged_so, reduction = "pca")^2
total_var <- merged_so@reductions$pca@misc$total.variance
var_selected_pc <- sum(pc_var[good_pcs])/total_var
cumvar_string <- sprintf(fmt = "%.1f", var_selected_pc*100)

stm(sprintf("Selected %s significant pcs via JackStraw, %s%% explained variation", nPC, cumvar_string))

pc_embeddings <- merged_so@reductions$pca@cell.embeddings

# stm(sprintf("Using %s principal components for umap",nPC))
# Run UMAP
# suppressWarnings(future::plan("multiprocess", workers = avail_workers))
stm(sprintf("Running UMAP on selected coordinates for hashed RNA umap"))

merged_so <- Seurat::RunUMAP(merged_so,
                                   dims = c(1:50)[good_pcs],
                                   umap.method = "uwot",
                                   seed.use = 3,
                                   verbose = FALSE)
# Get UMAP coordinates
umapDF <- merged_so[["umap"]]@cell.embeddings %>%
          as.data.frame() %>%
          dplyr::rename(UMAP_1_merged = UMAP_1, UMAP_2_merged = UMAP_2) %>%
          tibble::rownames_to_column(var = "barcodes")
umapDF <- merge(umapDF, hto_meta_singlet, by = "barcodes")
rownames(umapDF) <- umapDF$barcodes

umapDF <- umapDF[rownames(merged_so@"meta.data"), , drop = F]

normCounts <- merged_so[["RNA"]]@data
if(!all(rownames(umapDF) == colnames(normCounts))){
  stop("Merged UMAP dataframe does not match columns in normalized counts for hashed RNA UMAP")
}

Batch-level UMAP using 32 principal components (21.2% explained variance) selected by jackstraw. Singlet cells only, 2000 randomly sampled cells per Sample ID.

stm("Plotting Batch UMAP by Data Quality Metrics for hashed RNA")

# Gene scaling
max_genes <- max(8000, hto_meta_singlet$n_genes)
max_umi <- max(60000, hto_meta_singlet$n_umis)
point_size <- 0.2


# Cell Types
g_base <- ggplot(umapDF, aes(UMAP_1_merged, UMAP_2_merged))

# fraction mitochondrial umi
stm("Plotting Fraction Mito UMAP")
g3 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "Fraction Mitochondrial UMIs",
                 point_size = point_size,
                 color_col = "fct_mito_umi",
                 scale_color_fun = scale_color_fct_mito)

# N Genes
stm("Plotting N Genes UMAP")
g4 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "N Genes",
                 point_size = point_size,
                 color_col = "n_genes",
                 scale_color_fun = scale_color_genes(max_genes)
)

# N UMIs
stm("Plotting N UMIs UMAP")
g5 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "N UMIs",
                 point_size = point_size,
                 color_col = "n_umis",
                 scale_color_fun = scale_color_umis(max_umi)
)


# Wells
stm("Plotting Well UMAP")
cols_well <- H5weaver::varibow(n_colors = length(unique(umapDF$well_id)))

scale_color_well <- function(...){
    scale_color_manual(values = cols_well, ...)
}
g6 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "Well ID",
                 point_size = point_size,
                 color_col = "well_id",
                 scale_color_fun = scale_color_well
) +
  guides(colour = guide_legend(override.aes = list(size = 3)))

# Sample
stm("Plotting Sample ID UMAP")
cols_sample <- H5weaver::varibow(n_colors = length(unique(umapDF[,sample_column_name])))
scale_color_sample <- function(...){
    scale_color_manual(values = cols_sample, ...)
}

g7 <- plot_umap_report(df = umapDF,
                 x_col="UMAP_1_merged",
                 x_lab = "UMAP 1",
                 y_col = "UMAP_2_merged",
                 y_lab = "UMAP 2",
                 title = "Sample ID",
                 point_size = point_size,
                 color_col = sample_column_name,
                 scale_color_fun = scale_color_sample
) +
  guides(colour = guide_legend(override.aes = list(size = 3)))

aligned_plots <- cowplot::align_plots(g3, g4, g5, g6, g7, align = "hv", axis = "tblr")  # uniform plot sizing

cowplot::plot_grid(aligned_plots[[1]],
                   aligned_plots[[2]],
                   aligned_plots[[3]],
                   aligned_plots[[4]],
                   aligned_plots[[5]],
                   ncol = 2)

Return to Contents


scRNA seq report sample module v.1.0.0, Lauren Okada

ADT Well

Session Preparation

Expand output

Load Libraries
quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}
quiet_library(ggplot2)
quiet_library(Seurat)
quiet_library(viridis)
quiet_library(dplyr)
quiet_library(plyr)
quiet_library(data.table)
Locate input files
stm("Identifying files for ADT analysis")

# Input directory in_adt should be defined in the parent Rmarkdown report.
if(is.null(in_adt)) {
  warning("No input directory for adt batch report, defaulting to test data")
  in_adt <- system.file("extdata/X070/adt", package = "batchreporter")
} 

adt_count_files <- list.files(in_adt,
                              pattern = "Tag_Counts.csv")

well_names <- gsub('.{15}$', '', adt_count_files)

adt_count_filepaths <- list.files(in_adt, pattern = "Tag_Counts.csv", full.names = T)
adt_pos_count_filepaths <- list.files(in_adt, pattern = "positive_tag_counts.csv", full.names = T)
adt_pos_count_files <- basename(adt_pos_count_filepaths)

cat(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_files, collapse = "\n\t")), sep = "\n")
## IN ADT Tag Count files: 
##  X070-EP1C1W1_Tag_Counts.csv
##  X070-EP1C1W2_Tag_Counts.csv
##  X070-EP1C1W3_Tag_Counts.csv
stm(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_files, collapse = "\n\t")))  

cat(sprintf("IN POSITIVE ADT Tag Count files: \n\t%s", paste(adt_pos_count_files, collapse = "\n\t")), sep = "\n")
## IN POSITIVE ADT Tag Count files: 
##  X070-EP1C1W1_adt_positive_tag_counts.csv
##  X070-EP1C1W2_adt_positive_tag_counts.csv
##  X070-EP1C1W3_adt_positive_tag_counts.csv
stm(sprintf("IN POSITIVE ADT Tag Count files: \n\t%s", paste(adt_pos_count_files, collapse = "\n\t")))
Load Inputs

Expand output

Create pos & neg droplet matrices, then create seurat objects

Merge Seurat Objects

# all_merge <- Reduce(merge,seurat_list)

merge_seurat <- function(so_list){
  if (length(so_list) > 2){
    temp <- merge(x = so_list[[1]], y = so_list[[2]])
    i = 3
    while (i < length(so_list) + 1){
      temp <- merge(x = temp, y = so_list[[i]])
      i = i + 1
    }
    return(temp)
  }
  else{
    temp <- merge(x = so_list[[1]], y = so_list[[2]])
  }
  return(temp)
}

all_merge <- merge_seurat(seurat_list)

Split Violin Plot

Return to Contents

UMAP clustering for ADTs

## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 53132
## Number of edges: 1375642
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9449
## Number of communities: 45
## Elapsed time: 6 seconds

UMAP clustering ADTs by Well
DimPlot(adt_positives, group.by = 'well')

Return to Contents

n_features <- length(rownames(adt_positives))
height = ceiling((n_features / 3) * 2)

UMAP clustering for ADTs individually

Return to Contents

ADT UMIs Dataframe construction - start here

i = 1
colsums_list <- list()
while (i < length(count_list) + 1){
  temp_adt_so <- seurat_list[[i]]
  temp_adt_so <- SetIdent(temp_adt_so, value = 'orig.ident')
  temp_adt_positives <- subset(temp_adt_so, idents = 'Cells')
  temp_adt_pos_mtx <- t(data.frame(temp_adt_positives@assays$RNA@counts))
  colsums_df <- data.frame(colSums(temp_adt_pos_mtx))
  colnames(colsums_df) <- 'Count'
  colsums_df$ADT <- rownames(colsums_df)
  colsums_df$Well <- rep(well_names[[i]], length(rownames(colsums_df)))
  colsums_list[[i]] <- colsums_df
  i = i + 1
}

combined_colsums <- bind_rows(colsums_list, .id = "column_label")

ADT UMIs plot

ggplot(combined_colsums, aes(x = reorder(ADT, -Count), y = Count, color = Well)) + geom_point(size = 5) + 
  geom_hline(yintercept=median(combined_colsums$Count), linetype="dashed", color = "blue") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + scale_y_log10() 

ggplot(combined_colsums, aes(x = reorder(ADT, -Count), y = Count, color = Well)) + geom_point(size = 5) + 
  geom_hline(yintercept=median(combined_colsums$Count), linetype="dashed", color = "blue") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))  

Return to Contents


ADT report module v.1.0.0, Zach Thomson

ADT Sample

Session Preparation

Load Libraries
quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}
quiet_library(ggplot2)
quiet_library(Seurat)
quiet_library(viridis)
quiet_library(dplyr)
quiet_library(plyr)
quiet_library(data.table)
Locate input files
# Input directories in_adt and in_hto should be defined in the parent Rmarkdown report.

# For testing purposes
if(is.null(in_adt)) {
  warning("No input directory for adt batch report, defaulting to test data")
  in_adt <- system.file("extdata/X070/adt", package = "batchreporter")
} 
if(is.null(in_hto)) {
  warning("No input directory for hto batch report, defaulting to test data")
  in_hto <- system.file("extdata/X070/hto", package = "batchreporter")
} 

#create lists for future mapply functions
adt_count_filepaths <- list.files(in_adt, pattern = "tag_counts.csv", full.names = T)

hto_category_filepaths <- list.files(in_hto, pattern = "hto_category_table.csv.gz", full.names = T)
# Check input files
if(length(adt_count_filepaths) == 0) {
  stop("Can't find tag_counts.csv files for hashed ADT module processing. Check input 'adt' subdirectory for *tag_counts.csv files.")
}
if(length(hto_category_filepaths) == 0) {
  stop("Can't find hto_category_table.csv.gz files for hashed ADT module processing. Check input 'hto' subdirectory for *hto_category_table.csv.gz files.")
}


cat(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_filepaths, collapse = "\n\t")), sep = "\n")
## IN ADT Tag Count files: 
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/adt/X070-EP1C1W1_adt_positive_tag_counts.csv
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/adt/X070-EP1C1W2_adt_positive_tag_counts.csv
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/adt/X070-EP1C1W3_adt_positive_tag_counts.csv
stm(sprintf("IN ADT Tag Count files: \n\t%s", paste(adt_count_filepaths, collapse = "\n\t")))  

cat(sprintf("IN HTO Category files: \n\t%s", paste(hto_category_filepaths, collapse = "\n\t")), sep = "\n")
## IN HTO Category files: 
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W1_hto_category_table.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W2_hto_category_table.csv.gz
##  /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto/X070-P1C1W3_hto_category_table.csv.gz
stm(sprintf("IN HTO Category files: \n\t%s", paste(hto_category_filepaths, collapse = "\n\t")))
Load Inputs

Expand output

Subset singlets from ADT count matrix using HTO category table & combine into single DF

Violin Plot

violin_plot <- ggplot(combined_df, aes(x = pbmc_sample_id, y = total, fill=pbmc_sample_id)) + 
  geom_violin() + 
  scale_y_log10() + 
  ggtitle('ADT UMIs by Sample')
violin_plot + 
  stat_summary(fun=median, geom = "point", color="black") + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + 
  geom_hline(yintercept=1000, linetype="dashed", color = "blue") + 
  geom_hline(yintercept=300, linetype="solid", color = "red")

Return to Contents


Hashed ADT batch module v.1.0.0, Zach Thomson

scATAC Well

Data Processing

Session Preparation
Load libraries:
start_time_atac <- Sys.time()

quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}
quiet_library(data.table)
quiet_library(H5weaver)
quiet_library(ggplot2)
quiet_library(cowplot)
quiet_library(jsonlite)
quiet_library(purrr)
options(stringsAsFactors = FALSE)

Declaring start

stm("Starting scATAC Batch Report module")
Argument parsing
if(is.null(in_atac)) {
  in_atac <- system.file("testdata/batch_qc", package = "ATAComb")
  batch_id <- "X055"
  out_dir <- tempdir()
} 

stm(paste0("IN  results dir      : ", in_atac))
stm(paste0("IN  BatchID          : ", batch_id))
stm(paste0("OUT Directory        : ", out_dir))
Input Parameters
print(c(
  paste0("IN  results dir      : ", in_atac),
  paste0("IN  BatchID          : ", batch_id),
  paste0("OUT H5 directory     : ", out_dir)
))
## [1] "IN  results dir      : /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac"
## [2] "IN  BatchID          : X070"                                                                                            
## [3] "OUT H5 directory     : /var/folders/zc/cdsbwh5d60d_2h9c9brv90s80000gp/T//Rtmpo06rqu"
Check Input Files
if(!dir.exists(in_atac)) {
  stm(paste("ERROR: Cannot find IN results dir:", in_atac))
  stop()
}
if(!dir.exists(out_dir)) {
  stm(paste("Creating output directory:", out_dir))
  dir.create(out_dir)
}
out_prefix <- file.path(out_dir, paste0(batch_id, "_"))
Check available files

Unfiltered metadata

meta_files <- list.files(in_atac, 
                         pattern = "_all_metadata.csv.gz$",
                         full.names = TRUE)
if(length(meta_files) == 0) {
  stop("Can't find unfiltered metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Full Metadata Files:")
for(meta_file in meta_files) {
  stm(meta_file)
  print(meta_file)
}
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W1_all_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W2_all_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W3_all_metadata.csv.gz"
meta_list <- map(meta_files, fread)
sample_names <- sub(".+/","",sub("_all_metadata.csv.gz","",meta_files))
names(meta_list) <- sample_names

Filtered metadata

filt_meta_files <- list.files(in_atac, 
                         pattern = "_filtered_metadata.csv.gz$",
                         full.names = TRUE)

if(length(filt_meta_files) < length(meta_files)) {
  stop("Can't find filtered metadata files. Check input directory for *_filtered_metadata.csv.gz files.")
} else if(length(filt_meta_files) > length(meta_files)) {
  stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Filtered Metadata Files:")
for(filt_meta_file in filt_meta_files) {
  stm(filt_meta_file)
  print(filt_meta_file)
}
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W1_filtered_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W2_filtered_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W3_filtered_metadata.csv.gz"
filt_meta_list <- map(filt_meta_files, fread)
names(filt_meta_list) <- sub(".+/","",sub("_filtered_metadata.csv.gz","",filt_meta_files))
filt_meta_list <- filt_meta_list[sample_names]

Saturation projections

sat_files <- list.files(in_atac,
                        pattern = "_saturation_projection.csv.gz$",
                        full.names = TRUE)

if(length(sat_files) < length(meta_files)) {
  stop("Can't find all saturation files. Check input directory for *_saturation_projection.csv.gz files.")
} else if(length(sat_files) > length(meta_files)) {
  stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Saturation Projection Files:")
for(sat_file in sat_files) {
  stm(sat_file)
  print(sat_file)
}
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W1_saturation_projection.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W2_saturation_projection.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W3_saturation_projection.csv.gz"
names(sat_files) <- sub(".+/","",sub("_saturation_projection.csv.gz","",sat_files))
sat_files <- sat_files[sample_names]

sat_list <- map(sat_files, fread)

Fragment widths

width_files <- list.files(in_atac,
                          pattern = "_fragment_width_summary.csv.gz",
                          full.names = TRUE)

if(length(width_files) < length(meta_files)) {
  stop("Can't find all fragment width files. Check input directory for *_fragment_width_summary.csv.gz files.")
} else if(length(sat_files) > length(meta_files)) {
  stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Fragment Width Summary Files:")
for(width_file in width_files) {
  stm(width_file)
  print(width_file)
}
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W1_fragment_width_summary.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W2_fragment_width_summary.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W3_fragment_width_summary.csv.gz"
names(width_files) <- sub(".+/","",sub("_fragment_width_summary.csv.gz","",width_files))
width_files <- width_files[sample_names]

width_list <- map(width_files, fread)

Return to Contents

Combine metadata
filtered_meta <- do.call(rbind, filt_meta_list)
meta <- do.call(rbind, meta_list)

cutoffs <- list(altius_frac = 0.5,
                tss_frac = 0.2,
                peaks_frac = 0.2)

meta$pass_fail <- "pass"
for(i in seq_along(cutoffs)) {
  cut_name <- names(cutoffs)[i]
  cut_val <- cutoffs[[i]]
  cut_logic <- meta[[cut_name]] <= cut_val
  meta$pass_fail[cut_logic] <- "fail"
}

meta$filtered <- meta$barcodes %in% filtered_meta$barcodes
meta$mito_frac <- meta$n_mito / meta$n_fragments
meta$well_label <- paste0(meta$well_id, "\n", meta$pbmc_sample_id)

well_samples <- unique(meta[,c("well_id","pbmc_sample_id","well_label")])
Filter metadata based on cutoffs
stm("Filtering based on QC cutoffs")

meta <- meta %>%
  dplyr::left_join(dplyr::select(filtered_meta, barcodes, DoubletScore, DoubletEnrichment, TSSEnrichment), by = "barcodes")

filtered_meta <- meta
for(i in seq_along(cutoffs)) {
  cut_name <- names(cutoffs)[i]
  cut_val <- cutoffs[[i]]
  filtered_meta <- filtered_meta[filtered_meta[[cut_name]] > cut_val]
  filtered_meta <- filtered_meta[filtered_meta$filtered,]
}

filtered_meta$well_label <- paste0(filtered_meta$well_id, "\n", filtered_meta$pbmc_sample_id)

well_filtered_meta_list <- split(filtered_meta, filtered_meta$well_id)

Set up global metadata for reporting

meta$barcode_category <- "fail_qc"
meta$barcode_category[!meta$filtered & meta$pass_fail == "pass"] <- "pass_doublet"
meta$barcode_category[meta$filtered & meta$pass_fail == "pass"] <- "pass_singlet"

Return to Contents

QC Stats

qc_list <- list(report_type = "atac_batch_qc",
                report_datetime = as.character(start_time_atac),
                report_uuid = ids::uuid(use_time = TRUE),
                package = "ATAComb",
                package_version = sessionInfo()$otherPkgs$ATAComb$Version,
                batch_id = sub("_.+","",sample_names[1]))

out_json <- paste0(out_prefix, "atac_batch_qc_metrics.json")

Return to Contents

Barcode QC Stats
barcode_counts <- meta[,.(n_barcodes = nrow(.SD),
                            n_pass_qc = sum(.SD$pass_fail == "pass"),
                            n_fail_qc = sum(.SD$pass_fail == "fail"),
                            percent_fail = round(sum(.SD$pass_fail == "fail")/nrow(.SD)*100,2),
                            pass_singlets = sum(.SD$barcode_category == "pass_singlet"),
                            pass_doublets = sum(.SD$barcode_category == "pass_doublet"),
                            percent_doublets = round(sum(.SD$barcode_category == "pass_doublet")/sum(.SD$pass_fail == "pass")*100,2)),
                         .(well_id,pbmc_sample_id)]

qc_list$barcode_stats <- as.list(barcode_counts)

qc_table(barcode_counts)
qc_stacked_barplot(meta,
                   category_x = "well_label",
                   name_x = "Well ID",
                   category_y = "barcode_category",
                   category_name = "Barcode Category",
                   as_fraction = TRUE)

qc_aligned_barplot(meta,
                   category_x = "well_label",
                   name_x = "Well ID",
                   category_y = "barcode_category",
                   category_name = "Barcode Category")

qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "DoubletEnrichment",
                                     name_y = "Doublet Enrichment",
                                     log_y = FALSE,
                                     fill = "dodgerblue")

qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "DoubletScore",
                                     name_y = "Doublet Score",
                                     log_y = FALSE,
                                     fill = "dodgerblue")

qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "TSSEnrichment",
                                     name_y = "TSS Enrichment",
                                     log_y = FALSE,
                                     fill = "dodgerblue")

Return to Contents

Fragment QC stats

Plot Settings

n_grid_columns <- min(length(well_filtered_meta_list),4)
n_grid_rows <- ceiling(length(well_filtered_meta_list)/4)

grid_width <- n_grid_columns * 3
grid_height <- n_grid_rows * 3
fragment_stats <- filtered_meta[,.(n_singlets = nrow(.SD[.SD$filtered]),
                                   med_raw_frag = round(median(n_fragments),0),
                                   med_raw_perc_mito = round(median(mito_frac)*100,4),
                                   med_unique_frag = round(median(n_unique),0),
                                   med_unique_fritss = round(median(tss_frac),4),
                                   med_unique_frip = round(median(peaks_frac),4),
                                   med_unique_encode = round(median(altius_frac),4)
                                   ),
                                .(well_id,pbmc_sample_id)]

qc_list$fragment_stats <- as.list(fragment_stats)

qc_table(fragment_stats)

Return to Contents

Unique Fragments per Cell
category_reads_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "n_unique",
                                         name_y = "Unique Fragments",
                                         fill = "dodgerblue")
well_reads_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "n_unique",
                                     name_y = "Unique Fragments (Singlets)",
                                     fill = "dodgerblue")

reads_violin_list <- list(category_reads_violins, 
                          well_reads_violins)

plot_grid(plotlist = reads_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Return to Contents

Fraction of Raw Reads in Mitochondria per Cell
category_mito_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "mito_frac",
                                         name_y = "Fraction Mitochondrial",
                                         fill = "darkgreen",
                                        log_y = FALSE)
well_mito_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "mito_frac",
                                     name_y = "Fraction Mito. (Singlets)",
                                     fill = "darkgreen",
                                    log_y = FALSE)

mito_violin_list <- list(category_mito_violins, 
                          well_mito_violins)

plot_grid(plotlist = mito_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Return to Contents

Fraction of Reads in Peaks per Cell
category_frip_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "peaks_frac",
                                         name_y = "FRIP",
                                         fill = "orangered",
                                        log_y = FALSE)
well_frip_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "peaks_frac",
                                     name_y = "FRIP (Singlets)",
                                     fill = "orangered",
                                    log_y = FALSE)

frip_violin_list <- list(category_frip_violins, 
                          well_frip_violins)

plot_grid(plotlist = frip_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Reads vs peaks_frac scatter
qc_scatter_list <- map(well_filtered_meta_list,
                       function(well_meta) {
                         qc_scatter_plot(well_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "peaks_frac",
                                         name_y = "Frac Fragments in Peaks (peaks_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "orangered") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$peaks_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(well_meta$well_label[1])
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Fraction of Reads in TSS (+/- 2kb) per Cell
category_fritss_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "tss_frac",
                                         name_y = "FRITSS",
                                         fill = "mediumorchid3",
                                        log_y = FALSE)
well_fritss_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "tss_frac",
                                     name_y = "FRITSS (Singlets)",
                                     fill = "mediumorchid3",
                                    log_y = FALSE)

fritss_violin_list <- list(category_fritss_violins, 
                          well_fritss_violins)

plot_grid(plotlist = fritss_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Reads vs tss_frac scatter
qc_scatter_list <- map(well_filtered_meta_list,
                       function(well_meta) {
                         qc_scatter_plot(well_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "tss_frac",
                                         name_y = "Frac Fragments in TSS (tss_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "mediumorchid3") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$tss_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(well_meta$well_label[1])
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Fraction of Reads in ENCODE/Altius Index per Cell
category_enc_violins <- qc_violin_plot(meta,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "altius_frac",
                                         name_y = "FRIENCODE",
                                         fill = "darkred",
                                        log_y = FALSE)
well_enc_violins <- qc_violin_plot(filtered_meta,
                                     category_x = "well_label",
                                     name_x = "Well ID",
                                     column_y = "altius_frac",
                                     name_y = "FRIENCODE (Singlets)",
                                     fill = "darkred",
                                    log_y = FALSE)

enc_violin_list <- list(category_enc_violins, 
                          well_enc_violins)

plot_grid(plotlist = enc_violin_list,
          ncol = 2, rel_widths = c(1, 3),
          nrow = 1, align = "h")

Reads vs altius_frac scatter
qc_scatter_list <-map(well_filtered_meta_list,
                       function(well_meta) {
                         qc_scatter_plot(well_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "altius_frac",
                                         name_y = "Frac Fragments in Altius (altius_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "darkred") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$altius_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(well_meta$well_label[1])
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Saturation metrics
all_sat <- map_dfr(1:length(sat_list),
                   function(x) {
                     sat <- sat_list[[x]]
                     sat_pbmc_sample <- sub(paste0(batch_id,"_"),"",names(sat_list)[x])
                     sat$pbmc_sample_id <- sat_pbmc_sample
                     sat$well_id <- well_samples$well_id[well_samples$pbmc_sample_id == sat_pbmc_sample]
                     sat$well_label <- well_samples$well_label[well_samples$pbmc_sample_id == sat_pbmc_sample]
                     sat
                   })
Saturation cutoffs
all_sat <- as.data.table(all_sat)
sat_cutoffs <- all_sat[,.(M_raw = max(.SD$M_raw_reads[.SD$type == "actual"]),
                          M_umis = max(.SD$M_umis[.SD$type == "actual"]),
                          M_signal = max(.SD$M_signal_umis[.SD$type == "actual"]),
                          M_raw_2x_UMIs = .SD$M_raw_reads[which.min(abs(.SD$ratio - 2))[1]],
                          M_raw_4x_UMIs = .SD$M_raw_reads[which.min(abs(.SD$ratio - 4))[1]],
                          M_raw_2x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 2))[1]],
                          M_raw_4x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 4))[1]],
                          M_raw_8x_signal = .SD$M_raw_reads[which.min(abs(.SD$signal_ratio - 8))[1]]),
                       .(well_id,pbmc_sample_id)]

qc_list$saturation_stats <- as.list(sat_cutoffs)

qc_table(sat_cutoffs)
Saturation of Unique Fragments
reference_projections <- fread(system.file("reference/saturation/reference_projections.csv",
                                           package = "ATAComb"))
reference_projections$dataset <- paste0("ref_",reference_projections$dataset)

umi_saturation_plot <- ggplot() +
  geom_line(data = reference_projections,
            aes(x = n_raw_reads,
                y = expected_umis,
                group = dataset,
                color = dataset)) +
  geom_line(data = all_sat,
            aes(x = n_raw_reads,
                y = expected_umis,
                group = well_label,
                color = type),
            size = 2)  +
  scale_color_brewer("Data Type",
                     type = "qual",
                     palette = 2) +
  scale_x_continuous("N Raw Reads (millions)",
                     expand = c(0,0),
                     limits = c(0, 1.5e9),
                     breaks = seq(0, 1.5e9, by = 2.5e8),
                     labels = seq(0, 1500, by = 250)) +
  scale_y_continuous("N Unique Fragments (millions)", 
                     expand = c(0,0),
                     limits = c(0, 5e8),
                     breaks = seq(0, 5e8, by = 1e8),
                     labels = seq(0, 500, by = 100)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank()) +
  facet_wrap(facets = vars(well_label), ncol = 4) +
  ggtitle("Saturation of All Unique Fragments")

umi_saturation_plot

Saturation of Signal Fragments

Signal fragments are reads in Altius Index regions in barcodes passing QC

signal_saturation_plot <- ggplot() +
  geom_line(data = reference_projections,
            aes(x = n_raw_reads,
                y = signal_umis,
                group = dataset,
                color = dataset)) +
  geom_line(data = all_sat,
            aes(x = n_raw_reads,
                y = signal_umis,
                group = well_label,
                color = type),
            size = 2)  +
  scale_color_brewer("Data Type",
                     type = "qual",
                     palette = 2) +
  scale_x_continuous("N Raw Reads (millions)",
                     expand = c(0,0),
                     limits = c(0, 1.5e9),
                     breaks = seq(0, 1.5e9, by = 2.5e8),
                     labels = seq(0, 1500, by = 250)) +
  scale_y_continuous("N Signal Fragments (millions)", 
                     expand = c(0,0),
                     limits = c(0, 2e8),
                     breaks = seq(0, 2e8, by = 2e7),
                     labels = seq(0, 200, by = 20)) +
  theme_bw() +
  theme(panel.grid.minor = element_blank()) +
  facet_wrap(facets = vars(well_label), ncol = 4) +
  ggtitle("Saturation of Signal Fragments")

signal_saturation_plot

Return to Contents

Plot Settings

n_grid_columns <- min(length(well_filtered_meta_list),2)
n_grid_rows <- ceiling(length(well_filtered_meta_list)/2)

grid_width <- n_grid_columns * 4
grid_height <- n_grid_rows * 2
Fragment Width Distributions
frag_widths <- map_dfr(1:length(width_list),
                   function(x) {
                     fw <- width_list[[x]]
                     fw_pbmc_sample <- sub(paste0(batch_id,"_"),"",names(width_list)[x])
                     fw$pbmc_sample_id <- fw_pbmc_sample
                     fw$well_id <- well_samples$well_id[well_samples$pbmc_sample_id == fw_pbmc_sample]
                     fw$well_label <- well_samples$well_label[well_samples$pbmc_sample_id == fw_pbmc_sample]
                     fw
                   })

frag_widths <- frag_widths[width <= 750]

ggplot(data = frag_widths) +
  geom_line(aes(x = width,
                y = frac,
                group = pass_fail,
                color = pass_fail),
            size = 1) +
  xlim(0, 750) +
  facet_wrap(vars(well_label), ncol = 2) +
  theme_bw()

Return to Contents

Write QC JSON

stm(paste0("Writing JSON to ",out_json))

qc_list_json <- jsonlite::toJSON(qc_list,
                                 auto_unbox = TRUE,
                                 pretty = TRUE)

writeLines(qc_list_json,
           out_json)

Return to Contents

Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6          viridis_0.5.1       viridisLite_0.3.0  
##  [4] Seurat_3.9.9.9010   future.apply_1.6.0  future_1.19.1      
##  [7] purrr_0.3.4         jsonlite_1.7.1      tidyr_1.1.2        
## [10] plotly_4.9.3        gt_0.2.2            cowplot_1.1.0      
## [13] dplyr_1.0.4         ggplot2_3.3.2       HTOparser_1.0.12   
## [16] H5weaver_1.2.0      data.table_1.13.2   rhdf5_2.32.4       
## [19] Matrix_1.2-18       batchreporter_1.1.0 optparse_1.6.6     
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15           colorspace_1.4-1     deldir_0.1-29       
##   [4] ellipsis_0.3.1       ggridges_0.5.2       spatstat.data_2.1-0 
##   [7] farver_2.0.3         leiden_0.3.3         listenv_0.8.0       
##  [10] DT_0.16              getopt_1.20.3        ggrepel_0.8.2       
##  [13] RSpectra_0.16-0      R.methodsS3_1.8.1    codetools_0.2-16    
##  [16] splines_4.0.3        knitr_1.30           polyclip_1.10-0     
##  [19] ica_1.0-2            cluster_2.1.0        R.oo_1.24.0         
##  [22] png_0.1-7            uwot_0.1.9.9000      shiny_1.5.0         
##  [25] sctransform_0.3.1    compiler_4.0.3       httr_1.4.2          
##  [28] backports_1.1.10     assertthat_0.2.1     fastmap_1.0.1       
##  [31] lazyeval_0.2.2       later_1.1.0.1        htmltools_0.5.1.1   
##  [34] tools_4.0.3          rsvd_1.0.3           igraph_1.2.6        
##  [37] gtable_0.3.0         glue_1.4.2           RANN_2.6.1          
##  [40] reshape2_1.4.4       Rcpp_1.0.5           spatstat_1.64-1     
##  [43] vctrs_0.3.6          nlme_3.1-149         crosstalk_1.1.0.1   
##  [46] lmtest_0.9-38        xfun_0.18            stringr_1.4.0       
##  [49] globals_0.13.1       mime_0.9             miniUI_0.1.1.1      
##  [52] lifecycle_0.2.0      irlba_2.3.3          goftest_1.2-2       
##  [55] ids_1.0.1            MASS_7.3-53          zoo_1.8-8           
##  [58] scales_1.1.1         promises_1.1.1       spatstat.utils_2.1-0
##  [61] parallel_4.0.3       RColorBrewer_1.1-2   yaml_2.2.1          
##  [64] reticulate_1.16      pbapply_1.4-3        gridExtra_2.3       
##  [67] sass_0.3.1           rpart_4.1-15         stringi_1.5.3       
##  [70] checkmate_2.0.0      commonmark_1.7       rlang_0.4.10        
##  [73] pkgconfig_2.0.3      matrixStats_0.57.0   evaluate_0.14       
##  [76] lattice_0.20-41      ROCR_1.0-11          tensor_1.5          
##  [79] Rhdf5lib_1.10.1      labeling_0.4.2       patchwork_1.0.1     
##  [82] htmlwidgets_1.5.3    tidyselect_1.1.0     RcppAnnoy_0.0.16    
##  [85] magrittr_1.5         R6_2.4.1             generics_0.0.2      
##  [88] DBI_1.1.1            mgcv_1.8-33          pillar_1.4.6        
##  [91] withr_2.3.0          fitdistrplus_1.1-1   survival_3.2-7      
##  [94] abind_1.4-5          tibble_3.0.4         crayon_1.3.4        
##  [97] uuid_0.1-4           KernSmooth_2.23-17   rmarkdown_2.4       
## [100] grid_4.0.3           digest_0.6.26        xtable_1.8-4        
## [103] httpuv_1.5.4         R.utils_2.10.1       munsell_0.5.0

Total time elapsed

end_time <- Sys.time()
diff_time <- end_time - start_time_atac
time_message <- paste0("Elapsed Time: ", 
                       round(diff_time, 3),
                       " ", units(diff_time))
print(time_message)
## [1] "Elapsed Time: 16.516 secs"
stm(time_message)
stm("10x ATAC Batch QC Report complete.")

Return to Contents


scATAC report module 1.0.0, Lucas Graybuck

scATAC Sample

Data Processing

Session Preparation

Load libraries:
start_time_atac_hash <- Sys.time()

quiet_library <- function(...) {
  suppressPackageStartupMessages(library(...))
}
quiet_library(data.table)
quiet_library(H5weaver)
quiet_library(ggplot2)
quiet_library(cowplot)
quiet_library(jsonlite)
quiet_library(purrr)
options(stringsAsFactors = FALSE)

Declaring start

stm("Starting scATAC Cell Hashing Report Module")
Argument parsing
if(is.null(in_atac)) {
  in_atac <- system.file("testdata/batch_qc", package = "ATAComb")
  batch_id <- "X055"
  out_dir <- tempdir()
} 

stm(paste0("IN  ATAC results dir : ", in_atac))
stm(paste0("IN  HTO dir          : ", in_hto))
stm(paste0("IN  BatchID          : ", batch_id))
stm(paste0("OUT Directory        : ", out_dir))
Input Parameters
print(c(
  paste0("IN  ATAC results dir : ", in_atac),
  paste0("IN  HTO dir          : ", in_hto),
  paste0("IN  BatchID          : ", batch_id),
  paste0("OUT Directory        : ", out_dir)
))
## [1] "IN  ATAC results dir : /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac"
## [2] "IN  HTO dir          : /Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/hto"   
## [3] "IN  BatchID          : X070"                                                                                            
## [4] "OUT Directory        : /var/folders/zc/cdsbwh5d60d_2h9c9brv90s80000gp/T//Rtmpo06rqu"
Check Input Files
if(!dir.exists(in_atac)) {
  stm(paste("ERROR: Cannot find IN results dir:", in_atac))
  stop()
}  
if(!dir.exists(in_hto)) {
  stm(paste("ERROR: Cannot find IN hto dir:", in_hto))
  stop()
}
if(!dir.exists(out_dir)) {
  stm(paste("Creating output directory:", out_dir))
  dir.create(out_dir)
}
out_prefix <- file.path(out_dir, paste0(batch_id, "_"))
Check available files

Unfiltered metadata

meta_files <- list.files(in_atac, 
                         pattern = "_all_metadata.csv.gz$",
                         full.names = TRUE)
if(length(meta_files) == 0) {
  stop("Can't find unfiltered metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Full Metadata Files:")
for(meta_file in meta_files) {
  stm(meta_file)
  print(meta_file)
}
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W1_all_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W2_all_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W3_all_metadata.csv.gz"
meta_list <- map(meta_files, fread)
sample_names <- sub(".+/","",sub("_all_metadata.csv.gz","",meta_files))
names(meta_list) <- sample_names

Filtered metadata

filt_meta_files <- list.files(in_atac, 
                         pattern = "_filtered_metadata.csv.gz$",
                         full.names = TRUE)

if(length(filt_meta_files) < length(meta_files)) {
  stop("Can't find filtered metadata files. Check input directory for *_filtered_metadata.csv.gz files.")
} else if(length(filt_meta_files) > length(meta_files)) {
  stop("Can't find all metadata files. Check input directory for *_all_metadata.csv.gz files.")
}

stm("IN Filtered Metadata Files:")
for(filt_meta_file in filt_meta_files) {
  stm(filt_meta_file)
  print(filt_meta_file)
}
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W1_filtered_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W2_filtered_metadata.csv.gz"
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070/scatac/X070_X070-MP1C1W3_filtered_metadata.csv.gz"
filt_meta_list <- map(filt_meta_files, fread)
names(filt_meta_list) <- sub(".+/","",sub("_filtered_metadata.csv.gz","",filt_meta_files))
filt_meta_list <- filt_meta_list[sample_names]

HTO metadata

Return to Contents

Combine scATAC metadata

filtered_meta <- do.call(rbind, filt_meta_list)
meta <- do.call(rbind, meta_list)

cutoffs <- list(altius_frac = 0.5,
                tss_frac = 0.2,
                peaks_frac = 0.2)

meta$pass_fail <- "pass"
for(i in seq_along(cutoffs)) {
  cut_name <- names(cutoffs)[i]
  cut_val <- cutoffs[[i]]
  cut_logic <- meta[[cut_name]] <= cut_val
  meta$pass_fail[cut_logic] <- "fail"
}

meta$filtered <- meta$barcodes %in% filtered_meta$barcodes
meta$mito_frac <- meta$n_mito / meta$n_fragments
meta$well_label <- paste0(meta$well_id, "\n", meta$pbmc_sample_id)

well_samples <- unique(meta[,c("well_id","pbmc_sample_id","well_label")])
Filter metadata based on cutoffs
stm("Filtering based on QC cutoffs")

meta <- meta %>%
  dplyr::left_join(dplyr::select(filtered_meta, barcodes, DoubletScore, DoubletEnrichment, TSSEnrichment), by = "barcodes")

filtered_meta <- meta
for(i in seq_along(cutoffs)) {
  cut_name <- names(cutoffs)[i]
  cut_val <- cutoffs[[i]]
  filtered_meta <- filtered_meta[filtered_meta[[cut_name]] > cut_val]
  filtered_meta <- filtered_meta[filtered_meta$filtered,]
}

filtered_meta$well_label <- paste0(filtered_meta$well_id, "\n", filtered_meta$pbmc_sample_id)

well_filtered_meta_list <- split(filtered_meta, filtered_meta$well_id)

Set up global metadata for reporting

meta$barcode_category <- "fail_qc"
meta$barcode_category[!meta$filtered & meta$pass_fail == "pass"] <- "pass_doublet"
meta$barcode_category[meta$filtered & meta$pass_fail == "pass"] <- "pass_singlet"

Return to Contents

Format HTO metadata
stm("Formatting HTO meta data") 

hto_meta_list <- lapply(all_hto_meta, fread)
hto_meta_wells <- gsub("_.*","",basename(all_hto_meta))
hto_meta_list <- mapply(function(x,y){x$well_id <- y; x}, hto_meta_list, hto_meta_wells, SIMPLIFY = F)
hto_meta <- do.call(rbind, hto_meta_list)
rm("hto_meta_list")

hto_cat_levels <- c("no_hash", "singlet", "doublet", "multiplet")
hto_meta[in_hto_key, on = 'hto_barcode', hto_name := i.hto_name]  # merge in the hto names
hto_meta[ , pool_id := gsub("C\\dW\\d","", well_id)]
hto_meta[ , sample_hto:= sprintf("%s\n%s", hto_name, get(sample_column_name))]
hto_meta[ , sample_hto_pool:= sprintf("%s\n%s%s", hto_name, get(sample_column_name), pool_id)]
hto_meta[ , hto_order:=  as.numeric(gsub("HT","", hto_name))]
hto_meta[ , sample_hto_pool:=  factor(sample_hto_pool, levels = unique(sample_hto_pool[order(pool_id, hto_order)]))]
hto_meta[ , hto_order:= NULL]
hto_meta[ , hto_category:= factor(hto_category, levels = hto_cat_levels)]

# well id to merge in hto info with atac data
hto_meta[ , atac_well_id := gsub("-P","-AP", well_id)]
hto_meta[ , atac_original_barcodes := paste0(cell_barcode, "-1")]

hto_meta_singlet <- subset(hto_meta, hto_category=="singlet")

Return to Contents

Merge HTO and ATAC metadata
stm("Merging hto data with ATAC metadata") 

# Join in hto metadata for cells in atac dataset, rename some atac variables to avoid confusion
meta_hto <- meta %>%
    dplyr::rename(pbmc_sample_id_old = pbmc_sample_id) %>%
    dplyr::rename(atac_well_id = well_id) %>%
    dplyr::mutate(atac_pool_id = gsub("C.*", "", atac_well_id)) %>%
    dplyr::left_join(hto_meta, by = c("original_barcodes"="atac_original_barcodes", "atac_well_id")) %>%
    dplyr::mutate(pool_id=ifelse(is.na(pool_id), gsub("-A","-", atac_pool_id),pool_id))

meta_singlet <- meta_hto %>%
    filter(hto_category == "singlet")

# Full Join of hto and atac cells to compare existence between the two datasets
meta_hto_full <- meta %>%
    dplyr::rename(pbmc_sample_id_old = pbmc_sample_id) %>%
    dplyr::rename(atac_well_id = well_id) %>%
    dplyr::mutate(atac_pool_id = gsub("C.*", "", atac_well_id)) %>%
    dplyr::full_join(hto_meta, by = c("original_barcodes"="atac_original_barcodes", "atac_well_id")) %>%
    dplyr::mutate(hto_category = factor(hto_category, levels = c(hto_cat_levels, "hto_missing"))) %>%
    dplyr::mutate(hto_category = replace_na(hto_category, replace="hto_missing")) %>%
    dplyr::mutate(barcode_category = replace_na(barcode_category, replace="atac_missing")) %>%
    dplyr::mutate(pool_id=ifelse(is.na(pool_id), gsub("-A","-", atac_pool_id),pool_id))

Return to Contents

Merge HTO and Filtered ATAC metadata
stm("Merging hto data with filtered ATAC metadata") 

# Join in hto metadata for cells in atac dataset, rename some atac variables to avoid confusion
filtered_meta_hto <- filtered_meta %>%
    dplyr::rename(pbmc_sample_id_old = pbmc_sample_id) %>%
    dplyr::rename(atac_well_id = well_id) %>%
    dplyr::left_join(hto_meta, by = c("original_barcodes"="atac_original_barcodes", "atac_well_id")) 

filtered_meta_singlet <- filtered_meta_hto %>%
    dplyr::filter(hto_category == "singlet")  

well_filtered_meta_singlet_list <- split(filtered_meta_singlet, filtered_meta_singlet$well_id)
sample_filtered_meta_singlet_list <- split(filtered_meta_singlet, filtered_meta_singlet$pbmc_sample_id)

Return to Contents

QC Metrics: All Cells

qc_list <- list(report_type = "atac_batch_qc",
                report_datetime = as.character(start_time_atac_hash),
                report_uuid = ids::uuid(use_time = TRUE),
                package = "batchreporter",
                package_version = sessionInfo()$otherPkgs$batchreporter$Version,
                batch_id = sub("_.+","",sample_names[1]))

out_json <- paste0(out_prefix, "atac_hashing_batch_qc_metrics.json")

ATAC vs HTO QC Categories

Counts in “n_removed_atac” are cells in the HTO library that were filtered out in ATAC preprocessing for failing to meet minimal quality thresholds. Counts in “hto_missing” are cells in the ATAC library that were not detected in the HTO library.

stm("Generating HTO/ATAC QC Category contingency table") 

barcode_counts_htocat <- meta_hto_full[,.(n_barcodes = nrow(.SD),
                            n_pass_qc = sum(.SD$pass_fail == "pass", na.rm = T),
                            n_fail_qc = sum(.SD$pass_fail == "fail", na.rm = T),
                            n_removed_atac = sum(is.na(.SD$pass_fail)),
                            percent_fail = round(sum(.SD$pass_fail == "fail", na.rm=T)/sum(!is.na(.SD$pass_fail))*100,2),
                            pass_singlets = sum(.SD$barcode_category == "pass_singlet", na.rm=T),
                            pass_doublets = sum(.SD$barcode_category == "pass_doublet", na.rm=T),
                            percent_doublets = round(sum(.SD$barcode_category == "pass_doublet", na.rm=T)/sum(.SD$pass_fail == "pass", na.rm=T)*100,2)),
                         .(atac_well_id, hto_category)]

qc_list$barcode_stats_allhtoatac <- as.list(barcode_counts_htocat)

qc_table(barcode_counts_htocat)
stm("Plotting HTO/ATAC QC Category barplots") 
g1 <- qc_aligned_barplot_facet(meta_hto_full, category_x = "barcode_category", category_name = "HTO Category", 
                   category_y = "hto_category", name_y = "N Cells", 
                   name_x = "ATAC Barcode Category",colorset_y = "varibow", facet_formula = as.formula("~pool_id"))  
g2 <- qc_aligned_barplot_facet(meta_hto_full, category_x = "hto_category", category_name = "ATAC Barcode Category", 
                   category_y = "barcode_category", name_y = "N Cells", 
                   name_x = "HTO Category",colorset_y = "varibow", facet_formula = as.formula("~pool_id"))

npool <- length(unique(meta_hto_full$pool_id))
temp_figwidth <- 6*npool
temp_figheight <- 4

make_subchunk(g1, subchunk_name = "subchunk_htocat_ataccat",chunk_opt_list = list(fig.width = temp_figwidth, fig.height = temp_figheight))
(function () {    g})()

make_subchunk(g2, subchunk_name = "subchunk_ataccat_htocat",chunk_opt_list = list(fig.width = temp_figwidth, fig.height=temp_figheight))
(function () {    g})()

# cowplot::plot_grid(g1, g2, align = "hv", axis = "tblr")

Return to Contents

ATAC vs HTO Doublet Comparison

stm("Plotting ATAC Doublet Enrichment by HTO Categorys") 
gbox1 <- ggplot(meta_hto, aes(hto_category, DoubletEnrichment)) +
    geom_point(position = position_jitter(height = 0, width = 0.3), alpha = 0.1, aes(color = hto_category))+
    geom_boxplot(alpha = 0, outlier.alpha = 1)  +
    facet_wrap(~pool_id)

    # geom_violin(alpha = 0) + 

gbox2 <- ggplot(meta_hto, aes(hto_category, DoubletScore)) +
    geom_point(position = position_jitter(height = 0, width = 0.3), alpha = 0.1, aes(color = hto_category))+
    geom_boxplot(alpha = 0, outlier.alpha = 1)  +
    facet_wrap(~pool_id)
    # geom_violin(alpha = 0) + 

cowplot::plot_grid(gbox1, gbox2, align = "hv")

QC Metrics: Singlet Cells

QC stats for singlet cells as identified by hashing.

Sample QC Metrics

barcode_counts <- meta_singlet[,.(n_barcodes = nrow(.SD),
                            n_pass_qc = sum(.SD$pass_fail == "pass"),
                            n_fail_qc = sum(.SD$pass_fail == "fail"),
                            percent_fail = round(sum(.SD$pass_fail == "fail")/nrow(.SD)*100,2),
                            pass_singlets = sum(.SD$barcode_category == "pass_singlet"),
                            pass_doublets = sum(.SD$barcode_category == "pass_doublet"),
                            percent_doublets = round(sum(.SD$barcode_category == "pass_doublet")/sum(.SD$pass_fail == "pass")*100,2)),
                         .(pool_id, pbmc_sample_id)]

qc_list$barcode_stats <- as.list(barcode_counts)

qc_table(barcode_counts)
qc_stacked_barplot(meta_singlet,
                   category_x = "sample_hto_pool",
                   name_x = "Sample",
                   category_y = "barcode_category",
                   category_name = "Barcode Category",
                   as_fraction = TRUE) +
  xlab("Fraction Cells")

qc_aligned_barplot_facet(meta_singlet,
                   category_x = "sample_hto_pool",
                   name_x = "Sample",
                   category_y = "barcode_category",
                   category_name = "Barcode Category", facet_formula = as.formula("~pool_id"))

qc_violin_plot(filtered_meta_singlet,
               category_x = "sample_hto_pool",
               name_x = "Sample",
               column_y = "DoubletEnrichment",
               name_y = "Doublet Enrichment",
               log_y = FALSE,
               fill = "dodgerblue")

qc_violin_plot(filtered_meta_singlet,
               category_x = "sample_hto_pool",
               name_x = "Sample",
               column_y = "DoubletScore",
               name_y = "Doublet Score",
               log_y = FALSE,
               fill = "dodgerblue")

qc_violin_plot(filtered_meta_singlet,
               category_x = "sample_hto_pool",
               name_x = "Sample",
               column_y = "TSSEnrichment",
               name_y = "TSS Enrichment",
               log_y = FALSE,
               fill = "dodgerblue")

Return to Contents

Fragment QC Metrics

Fragment QC stats for all singlet cells as determined by HTO category.

Plot Settings

n_grid_columns <- min(length(sample_filtered_meta_singlet_list),4)
n_grid_rows <- ceiling(length(sample_filtered_meta_singlet_list)/4)

grid_width <- n_grid_columns * 3
grid_height <- n_grid_rows * 3
fragment_stats <- filtered_meta_singlet[,.(n_singlets = nrow(.SD[.SD$filtered]),
                                   med_raw_frag = round(median(n_fragments),0),
                                   med_raw_perc_mito = round(median(mito_frac)*100,4),
                                   med_unique_frag = round(median(n_unique),0),
                                   med_unique_fritss = round(median(tss_frac),4),
                                   med_unique_frip = round(median(peaks_frac),4),
                                   med_unique_encode = round(median(altius_frac),4)
                                   ),
                                .(pool_id, pbmc_sample_id)]

qc_list$fragment_stats <- as.list(fragment_stats)

qc_table(fragment_stats)

Return to Contents

Unique Fragments per Cell

# violins for all atac data by atac category
category_reads_violins <- qc_violin_plot(meta_hto,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "n_unique",
                                         name_y = "Unique Fragments",
                                         fill = "dodgerblue")
# violins for all atac data by hto category
hto_reads_violins <- qc_violin_plot(meta_hto,
                                     category_x = "hto_category",
                                     name_x = "HTO Category",
                                     column_y = "n_unique",
                                     name_y = "Unique Fragments",
                                     fill = "dodgerblue")
well_reads_violins <- qc_violin_plot(filtered_meta_singlet,
                                     category_x = "sample_hto_pool",
                                     name_x = "Sample",
                                     column_y = "n_unique",
                                     name_y = "Unique Fragments (Singlets)",
                                     fill = "dodgerblue")

reads_violin_list <- list(category_reads_violins, 
                          hto_reads_violins,
                          well_reads_violins)

plot_grid(plotlist = reads_violin_list,
          ncol = 3, rel_widths = c(1, 1, 3), 
          nrow = 1, align = "h")

Return to Contents

Fraction of Raw Reads in Mitochondria per Cell

category_mito_violins <- qc_violin_plot(meta_hto,
                                        category_x = "barcode_category",
                                        name_x = "Barcode Type",
                                        column_y = "mito_frac",
                                        name_y = "Fraction Mitochondrial",
                                        fill = "darkgreen",
                                        log_y = F) 

hto_mito_violins <- qc_violin_plot(meta_hto,
                                    category_x = "hto_category",
                                    name_x = "HTO Category",
                                    column_y = "mito_frac",
                                    name_y = "Fraction Mitochondrial",
                                    fill = "darkgreen",
                                    log_y = F)

sample_mito_violins <- qc_violin_plot(filtered_meta_singlet,
                                     category_x = "sample_hto_pool",
                                     name_x = "Sample",
                                     column_y = "mito_frac",
                                     name_y = "Fraction Mito. (Singlets)",
                                     fill = "darkgreen",
                                     log_y = F)

mito_violin_list <- list(category_mito_violins, 
                         hto_mito_violins,
                          sample_mito_violins)


plot_grid(plotlist = mito_violin_list,
          ncol = 3, rel_widths = c(1, 1, 3), 
          nrow = 1, align = "h")

Return to Contents

Fraction of Reads in Peaks per Cell

category_frip_violins <- qc_violin_plot(meta_hto,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "peaks_frac",
                                         name_y = "FRIP",
                                         fill = "orangered",
                                        log_y = FALSE)
hto_frip_violins <- qc_violin_plot(meta_hto,
                                         category_x = "hto_category",
                                         name_x = "HTO Category",
                                         column_y = "peaks_frac",
                                         name_y = "FRIP",
                                         fill = "orangered",
                                        log_y = FALSE)
sample_frip_violins <- qc_violin_plot(filtered_meta_singlet,
                                     category_x = "sample_hto_pool",
                                     name_x = "Sample",
                                     column_y = "peaks_frac",
                                     name_y = "FRIP (Singlets)",
                                     fill = "orangered",
                                    log_y = FALSE)

frip_violin_list <- list(category_frip_violins, 
                         hto_frip_violins,
                         sample_frip_violins)

plot_grid(plotlist = frip_violin_list,
          ncol = 3, rel_widths = c(1, 1, 3),
          nrow = 1, align = "h")

Reads vs peaks_frac scatter

qc_scatter_list <- map(sample_filtered_meta_singlet_list,
                       function(sample_meta) {
                         qc_scatter_plot(sample_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "peaks_frac",
                                         name_y = "Frac Fragments in Peaks (peaks_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "orangered") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$peaks_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(sprintf("%s %s", sample_meta$pool_id[1], sample_meta$pbmc_sample_id[1]))
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Fraction of Reads in TSS (+/- 2kb) per Cell

category_fritss_violins <- qc_violin_plot(meta_hto,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "tss_frac",
                                         name_y = "FRITSS",
                                         fill = "mediumorchid3",
                                        log_y = FALSE)  
hto_fritss_violins <- qc_violin_plot(meta_hto,
                                         category_x = "hto_category",
                                         name_x = "HTO Category",
                                         column_y = "tss_frac",
                                         name_y = "FRITSS",
                                         fill = "mediumorchid3",
                                        log_y = FALSE)
well_fritss_violins <- qc_violin_plot(filtered_meta_singlet,
                                     category_x = "sample_hto_pool",
                                     name_x = "Sample",
                                     column_y = "tss_frac",
                                     name_y = "FRITSS (Singlets)",
                                     fill = "mediumorchid3",
                                    log_y = FALSE)

fritss_violin_list <- list(category_fritss_violins, 
                           hto_fritss_violins,
                           well_fritss_violins)

plot_grid(plotlist = fritss_violin_list,
          ncol = 3, rel_widths = c(1, 1, 3),
          nrow = 1, align = "h")

Reads vs tss_frac scatter

qc_scatter_list <- map(sample_filtered_meta_singlet_list,
                       function(sample_meta) {
                         qc_scatter_plot(sample_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "tss_frac",
                                         name_y = "Frac Fragments in TSS (tss_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "mediumorchid3") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$tss_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(sprintf("%s %s", sample_meta$pool_id[1],sample_meta$pbmc_sample_id[1]))
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Fraction of Reads in ENCODE/Altius Index per Cell

category_enc_violins <- qc_violin_plot(meta_hto,
                                         category_x = "barcode_category",
                                         name_x = "Barcode Type",
                                         column_y = "altius_frac",
                                         name_y = "FRIENCODE",
                                         fill = "darkred",
                                        log_y = FALSE)
hto_enc_violins <- qc_violin_plot(meta_hto,
                                         category_x = "hto_category",
                                         name_x = "HTO Category",
                                         column_y = "altius_frac",
                                         name_y = "FRIENCODE",
                                         fill = "darkred",
                                        log_y = FALSE)
well_enc_violins <- qc_violin_plot(filtered_meta_singlet,
                                     category_x = "pbmc_sample_id",
                                     name_x = "Sample ID",
                                     column_y = "altius_frac",
                                     name_y = "FRIENCODE (Singlets)",
                                     fill = "darkred",
                                    log_y = FALSE)

enc_violin_list <- list(category_enc_violins, 
                        hto_enc_violins,
                        well_enc_violins)

plot_grid(plotlist = enc_violin_list,
          ncol = 3, rel_widths = c(1, 1, 3),
          nrow = 1, align = "h")

Reads vs altius_frac scatter

qc_scatter_list <-map(sample_filtered_meta_singlet_list,
                       function(sample_meta) {
                         qc_scatter_plot(sample_meta,
                                         column_x = "n_unique",
                                         name_x = "N Unique Fragments per Cell",
                                         column_y = "altius_frac",
                                         name_y = "Frac Fragments in Altius (altius_frac)",
                                         log_x = TRUE, log_y = FALSE, frac_y = TRUE,
                                         show_targets = FALSE,
                                         color = "darkred") +
                           geom_vline(aes(xintercept = 2.5e3), linetype = "dashed", size = 0.2) +
                           geom_hline(aes(yintercept = cutoffs$altius_frac), linetype = "dashed", size = 0.2) +
                           ggtitle(sprintf("%s %s", sample_meta$pool_id[1],sample_meta$pbmc_sample_id[1]))
                       })

plot_grid(plotlist = qc_scatter_list,
          ncol = n_grid_columns,
          nrow = n_grid_rows)

Return to Contents

Write QC JSON

stm(paste0("Writing JSON to ",out_json))

qc_list_json <- jsonlite::toJSON(qc_list,
                                 auto_unbox = TRUE,
                                 pretty = TRUE)

writeLines(qc_list_json,
           out_json)

Return to Contents

Session Information

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6          viridis_0.5.1       viridisLite_0.3.0  
##  [4] Seurat_3.9.9.9010   future.apply_1.6.0  future_1.19.1      
##  [7] purrr_0.3.4         jsonlite_1.7.1      tidyr_1.1.2        
## [10] plotly_4.9.3        gt_0.2.2            cowplot_1.1.0      
## [13] dplyr_1.0.4         ggplot2_3.3.2       HTOparser_1.0.12   
## [16] H5weaver_1.2.0      data.table_1.13.2   rhdf5_2.32.4       
## [19] Matrix_1.2-18       batchreporter_1.1.0 optparse_1.6.6     
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15           colorspace_1.4-1     deldir_0.1-29       
##   [4] ellipsis_0.3.1       ggridges_0.5.2       spatstat.data_2.1-0 
##   [7] farver_2.0.3         leiden_0.3.3         listenv_0.8.0       
##  [10] DT_0.16              getopt_1.20.3        ggrepel_0.8.2       
##  [13] RSpectra_0.16-0      R.methodsS3_1.8.1    codetools_0.2-16    
##  [16] splines_4.0.3        knitr_1.30           polyclip_1.10-0     
##  [19] ica_1.0-2            cluster_2.1.0        R.oo_1.24.0         
##  [22] png_0.1-7            uwot_0.1.9.9000      shiny_1.5.0         
##  [25] sctransform_0.3.1    compiler_4.0.3       httr_1.4.2          
##  [28] backports_1.1.10     assertthat_0.2.1     fastmap_1.0.1       
##  [31] lazyeval_0.2.2       later_1.1.0.1        htmltools_0.5.1.1   
##  [34] tools_4.0.3          rsvd_1.0.3           igraph_1.2.6        
##  [37] gtable_0.3.0         glue_1.4.2           RANN_2.6.1          
##  [40] reshape2_1.4.4       Rcpp_1.0.5           spatstat_1.64-1     
##  [43] vctrs_0.3.6          nlme_3.1-149         crosstalk_1.1.0.1   
##  [46] lmtest_0.9-38        xfun_0.18            stringr_1.4.0       
##  [49] globals_0.13.1       mime_0.9             miniUI_0.1.1.1      
##  [52] lifecycle_0.2.0      irlba_2.3.3          goftest_1.2-2       
##  [55] ids_1.0.1            MASS_7.3-53          zoo_1.8-8           
##  [58] scales_1.1.1         promises_1.1.1       spatstat.utils_2.1-0
##  [61] parallel_4.0.3       RColorBrewer_1.1-2   yaml_2.2.1          
##  [64] reticulate_1.16      pbapply_1.4-3        gridExtra_2.3       
##  [67] sass_0.3.1           rpart_4.1-15         stringi_1.5.3       
##  [70] checkmate_2.0.0      commonmark_1.7       rlang_0.4.10        
##  [73] pkgconfig_2.0.3      matrixStats_0.57.0   evaluate_0.14       
##  [76] lattice_0.20-41      ROCR_1.0-11          tensor_1.5          
##  [79] Rhdf5lib_1.10.1      labeling_0.4.2       patchwork_1.0.1     
##  [82] htmlwidgets_1.5.3    tidyselect_1.1.0     RcppAnnoy_0.0.16    
##  [85] magrittr_1.5         R6_2.4.1             generics_0.0.2      
##  [88] DBI_1.1.1            mgcv_1.8-33          pillar_1.4.6        
##  [91] withr_2.3.0          fitdistrplus_1.1-1   survival_3.2-7      
##  [94] abind_1.4-5          tibble_3.0.4         crayon_1.3.4        
##  [97] uuid_0.1-4           KernSmooth_2.23-17   rmarkdown_2.4       
## [100] grid_4.0.3           digest_0.6.26        xtable_1.8-4        
## [103] httpuv_1.5.4         R.utils_2.10.1       munsell_0.5.0

Total time elapsed

end_time <- Sys.time()
diff_time <- end_time - start_time_atac_hash
time_message <- paste0("Elapsed Time: ", 
                       round(diff_time, 3),
                       " ", units(diff_time))
print(time_message)
## [1] "Elapsed Time: 25.469 secs"
stm(time_message)
stm("10x ATAC Batch QC Report complete.")

Return to Contents


Hashed scATAC batch QC module 1.0.0, Lucas Graybuck, Lauren Okada

Session Information

Input Directory:

in_dir 
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/X070"

Input Directory Contents:

folders <- list.dirs(in_dir, recursive = FALSE)

file_list <- lapply(folders, function(x){
  dir(x, recursive = TRUE)
})
names(file_list) <- basename(folders)
file_list
## $adt
## [1] "X070-EP1C1W1_adt_positive_tag_counts.csv"
## [2] "X070-EP1C1W1_Tag_Counts.csv"             
## [3] "X070-EP1C1W2_adt_positive_tag_counts.csv"
## [4] "X070-EP1C1W2_Tag_Counts.csv"             
## [5] "X070-EP1C1W3_adt_positive_tag_counts.csv"
## [6] "X070-EP1C1W3_Tag_Counts.csv"             
## 
## $hto
## [1] "X070-P1C1W1_hto_category_table.csv.gz"  
## [2] "X070-P1C1W1_hto_count_matrix.csv.gz"    
## [3] "X070-P1C1W1_hto_processing_metrics.json"
## [4] "X070-P1C1W2_hto_category_table.csv.gz"  
## [5] "X070-P1C1W2_hto_count_matrix.csv.gz"    
## [6] "X070-P1C1W2_hto_processing_metrics.json"
## [7] "X070-P1C1W3_hto_category_table.csv.gz"  
## [8] "X070-P1C1W3_hto_count_matrix.csv.gz"    
## [9] "X070-P1C1W3_hto_processing_metrics.json"
## 
## $scatac
##  [1] "X070_X070-MP1C1W1_all_metadata.csv.gz"          
##  [2] "X070_X070-MP1C1W1_filtered_metadata.csv.gz"     
##  [3] "X070_X070-MP1C1W1_fragment_width_summary.csv.gz"
##  [4] "X070_X070-MP1C1W1_saturation_projection.csv.gz" 
##  [5] "X070_X070-MP1C1W2_all_metadata.csv.gz"          
##  [6] "X070_X070-MP1C1W2_filtered_metadata.csv.gz"     
##  [7] "X070_X070-MP1C1W2_fragment_width_summary.csv.gz"
##  [8] "X070_X070-MP1C1W2_saturation_projection.csv.gz" 
##  [9] "X070_X070-MP1C1W3_all_metadata.csv.gz"          
## [10] "X070_X070-MP1C1W3_filtered_metadata.csv.gz"     
## [11] "X070_X070-MP1C1W3_fragment_width_summary.csv.gz"
## [12] "X070_X070-MP1C1W3_saturation_projection.csv.gz" 
## 
## $scrna
## [1] "X070-P1C1W1.h5" "X070-P1C1W2.h5" "X070-P1C1W3.h5"

Batch processing metadata:

in_batch_meta 
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/batch-metadata.json"

Config File:

in_config
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/default_rna_config_v1.csv"

Key File:

in_key
## [1] "/Library/Frameworks/R.framework/Versions/4.0/Resources/library/batchreporter/extdata/example_sample_key_X070.csv"

Output Directory:

out_dir
## [1] "/var/folders/zc/cdsbwh5d60d_2h9c9brv90s80000gp/T//Rtmpo06rqu"

Session Info:

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plyr_1.8.6          viridis_0.5.1       viridisLite_0.3.0  
##  [4] Seurat_3.9.9.9010   future.apply_1.6.0  future_1.19.1      
##  [7] purrr_0.3.4         jsonlite_1.7.1      tidyr_1.1.2        
## [10] plotly_4.9.3        gt_0.2.2            cowplot_1.1.0      
## [13] dplyr_1.0.4         ggplot2_3.3.2       HTOparser_1.0.12   
## [16] H5weaver_1.2.0      data.table_1.13.2   rhdf5_2.32.4       
## [19] Matrix_1.2-18       batchreporter_1.1.0 optparse_1.6.6     
## 
## loaded via a namespace (and not attached):
##   [1] Rtsne_0.15           colorspace_1.4-1     deldir_0.1-29       
##   [4] ellipsis_0.3.1       ggridges_0.5.2       spatstat.data_2.1-0 
##   [7] farver_2.0.3         leiden_0.3.3         listenv_0.8.0       
##  [10] DT_0.16              getopt_1.20.3        ggrepel_0.8.2       
##  [13] RSpectra_0.16-0      R.methodsS3_1.8.1    codetools_0.2-16    
##  [16] splines_4.0.3        knitr_1.30           polyclip_1.10-0     
##  [19] ica_1.0-2            cluster_2.1.0        R.oo_1.24.0         
##  [22] png_0.1-7            uwot_0.1.9.9000      shiny_1.5.0         
##  [25] sctransform_0.3.1    compiler_4.0.3       httr_1.4.2          
##  [28] backports_1.1.10     assertthat_0.2.1     fastmap_1.0.1       
##  [31] lazyeval_0.2.2       later_1.1.0.1        htmltools_0.5.1.1   
##  [34] tools_4.0.3          rsvd_1.0.3           igraph_1.2.6        
##  [37] gtable_0.3.0         glue_1.4.2           RANN_2.6.1          
##  [40] reshape2_1.4.4       Rcpp_1.0.5           spatstat_1.64-1     
##  [43] vctrs_0.3.6          nlme_3.1-149         crosstalk_1.1.0.1   
##  [46] lmtest_0.9-38        xfun_0.18            stringr_1.4.0       
##  [49] globals_0.13.1       mime_0.9             miniUI_0.1.1.1      
##  [52] lifecycle_0.2.0      irlba_2.3.3          goftest_1.2-2       
##  [55] ids_1.0.1            MASS_7.3-53          zoo_1.8-8           
##  [58] scales_1.1.1         promises_1.1.1       spatstat.utils_2.1-0
##  [61] parallel_4.0.3       RColorBrewer_1.1-2   yaml_2.2.1          
##  [64] reticulate_1.16      pbapply_1.4-3        gridExtra_2.3       
##  [67] sass_0.3.1           rpart_4.1-15         stringi_1.5.3       
##  [70] checkmate_2.0.0      commonmark_1.7       rlang_0.4.10        
##  [73] pkgconfig_2.0.3      matrixStats_0.57.0   evaluate_0.14       
##  [76] lattice_0.20-41      ROCR_1.0-11          tensor_1.5          
##  [79] Rhdf5lib_1.10.1      labeling_0.4.2       patchwork_1.0.1     
##  [82] htmlwidgets_1.5.3    tidyselect_1.1.0     RcppAnnoy_0.0.16    
##  [85] magrittr_1.5         R6_2.4.1             generics_0.0.2      
##  [88] DBI_1.1.1            mgcv_1.8-33          pillar_1.4.6        
##  [91] withr_2.3.0          fitdistrplus_1.1-1   survival_3.2-7      
##  [94] abind_1.4-5          tibble_3.0.4         crayon_1.3.4        
##  [97] uuid_0.1-4           KernSmooth_2.23-17   rmarkdown_2.4       
## [100] grid_4.0.3           digest_0.6.26        xtable_1.8-4        
## [103] httpuv_1.5.4         R.utils_2.10.1       munsell_0.5.0

Total time elapsed

end_time_all <- Sys.time()
diff_time_all <- end_time_all - start_time_all
time_message_all <- paste0("Elapsed Time: ", 
                       round(diff_time_all, 3),
                       " ", units(diff_time_all))
print(time_message_all)
## [1] "Elapsed Time: 8.08 mins"
stm(time_message_all)
stm("Batch report process complete.")

Return to Top


NGS report v.1.0.0, Lauren Okada